D48500.diff
No OneTemporary
Actions

Size

237 KB

Referenced Files

None

Subscribers

None

D48500.diff
View Options

	diff --git a/contrib/cortex-strings/.gitignore b/contrib/cortex-strings/.gitignore
	deleted file mode 100644
	--- a/contrib/cortex-strings/.gitignore
	+++ /dev/null
	@@ -1,11 +0,0 @@
	-*.a
	-*.o
	-*.la
	-*.lo
	-*.png
	-*.pyc
	-.deps
	-.dirstamp
	-.libs
	-try-*
	-cache.txt
	diff --git a/contrib/cortex-strings/Makefile.am b/contrib/cortex-strings/Makefile.am
	deleted file mode 100644
	--- a/contrib/cortex-strings/Makefile.am
	+++ /dev/null
	@@ -1,327 +0,0 @@
	-# Copyright (c) 2011, Linaro Limited
	-# All rights reserved.
	-#
	-# Redistribution and use in source and binary forms, with or without
	-# modification, are permitted provided that the following conditions are met:
	-# * Redistributions of source code must retain the above copyright
	-# notice, this list of conditions and the following disclaimer.
	-# * Redistributions in binary form must reproduce the above copyright
	-# notice, this list of conditions and the following disclaimer in the
	-# documentation and/or other materials provided with the distribution.
	-# * Neither the name of the Linaro nor the
	-# names of its contributors may be used to endorse or promote products
	-# derived from this software without specific prior written permission.
	-#
	-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
	-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	-#
	-
	-# Top level Makefile for cortex-strings
	-
	-# Used to record the compiler version in the executables
	-COMPILER = $(shell $(CC) --version 2>&1 \| head -n1)
	-
	-# The main library
	-lib_LTLIBRARIES = \
	- libcortex-strings.la
	-
	-## Test suite
	-check_PROGRAMS = \
	- tests/test-memchr \
	- tests/test-memcmp \
	- tests/test-memcpy \
	- tests/test-memmove \
	- tests/test-memset \
	- tests/test-strchr \
	- tests/test-strcmp \
	- tests/test-strcpy \
	- tests/test-strlen \
	- tests/test-strncmp \
	- tests/test-strnlen
	-
	-# Options for the tests
	-tests_cflags = -I$(srcdir)/tests $(AM_CFLAGS)
	-tests_ldadd = libcortex-strings.la
	-tests_test_memchr_LDADD = $(tests_ldadd)
	-tests_test_memchr_CFLAGS = $(tests_cflags)
	-tests_test_memcmp_LDADD = $(tests_ldadd)
	-tests_test_memcmp_CFLAGS = $(tests_cflags)
	-tests_test_memcpy_LDADD = $(tests_ldadd)
	-tests_test_memcpy_CFLAGS = $(tests_cflags)
	-tests_test_memmove_LDADD = $(tests_ldadd)
	-tests_test_memmove_CFLAGS = $(tests_cflags)
	-tests_test_memset_LDADD = $(tests_ldadd)
	-tests_test_memset_CFLAGS = $(tests_cflags)
	-tests_test_strchr_LDADD = $(tests_ldadd)
	-tests_test_strchr_CFLAGS = $(tests_cflags)
	-tests_test_strcmp_LDADD = $(tests_ldadd)
	-tests_test_strcmp_CFLAGS = $(tests_cflags)
	-tests_test_strcpy_LDADD = $(tests_ldadd)
	-tests_test_strcpy_CFLAGS = $(tests_cflags)
	-tests_test_strlen_LDADD = $(tests_ldadd)
	-tests_test_strlen_CFLAGS = $(tests_cflags)
	-tests_test_strncmp_LDADD = $(tests_ldadd)
	-tests_test_strncmp_CFLAGS = $(tests_cflags)
	-
	-TESTS = $(check_PROGRAMS)
	-
	-## Benchmarks
	-noinst_PROGRAMS = \
	- dhry \
	- dhry-native \
	- try-none \
	- try-this \
	- try-plain \
	- try-newlib-c \
	- try-bionic-c \
	- try-glibc-c
	-
	-# Good 'ol Dhrystone
	-dhry_SOURCES = \
	- benchmarks/dhry/dhry_1.c \
	- benchmarks/dhry/dhry_2.c \
	- benchmarks/dhry/dhry.h
	-
	-dhry_CFLAGS = -Dcompiler="\"$(COMPILER)\"" -Doptions="\"$(CFLAGS)\""
	-dhry_LDADD = libcortex-strings.la
	-
	-dhry_native_SOURCES = $(dhry_SOURCES)
	-dhry_native_CFLAGS = $(dhry_CFLAGS)
	-
	-# Benchmark harness
	-noinst_LIBRARIES = \
	- libmulti.a \
	- libbionic-c.a \
	- libglibc-c.a \
	- libnewlib-c.a \
	- libplain.a
	-
	-libmulti_a_SOURCES = \
	- benchmarks/multi/harness.c
	-
	-libmulti_a_CFLAGS = -DVERSION=\"$(VERSION)\" $(AM_CFLAGS)
	-
	-## Other architecture independant implementaions
	-libbionic_c_a_SOURCES = \
	- reference/bionic-c/bcopy.c \
	- reference/bionic-c/memchr.c \
	- reference/bionic-c/memcmp.c \
	- reference/bionic-c/memcpy.c \
	- reference/bionic-c/memset.c \
	- reference/bionic-c/strchr.c \
	- reference/bionic-c/strcmp.c \
	- reference/bionic-c/strcpy.c \
	- reference/bionic-c/strlen.c
	-
	-libglibc_c_a_SOURCES = \
	- reference/glibc-c/memchr.c \
	- reference/glibc-c/memcmp.c \
	- reference/glibc-c/memcpy.c \
	- reference/glibc-c/memset.c \
	- reference/glibc-c/strchr.c \
	- reference/glibc-c/strcmp.c \
	- reference/glibc-c/strcpy.c \
	- reference/glibc-c/strlen.c \
	- reference/glibc-c/wordcopy.c \
	- reference/glibc-c/memcopy.h \
	- reference/glibc-c/pagecopy.h
	-
	-libnewlib_c_a_SOURCES = \
	- reference/newlib-c/memchr.c \
	- reference/newlib-c/memcmp.c \
	- reference/newlib-c/memcpy.c \
	- reference/newlib-c/memset.c \
	- reference/newlib-c/strchr.c \
	- reference/newlib-c/strcmp.c \
	- reference/newlib-c/strcpy.c \
	- reference/newlib-c/strlen.c \
	- reference/newlib-c/shim.h
	-
	-libplain_a_SOURCES = \
	- reference/plain/memset.c \
	- reference/plain/memcpy.c \
	- reference/plain/strcmp.c \
	- reference/plain/strcpy.c
	-
	-try_none_SOURCES =
	-try_none_LDADD = libmulti.a -lrt
	-try_this_SOURCES =
	-try_this_LDADD = libmulti.a libcortex-strings.la -lrt
	-try_bionic_c_SOURCES =
	-try_bionic_c_LDADD = libmulti.a libbionic-c.a -lrt
	-try_glibc_c_SOURCES =
	-try_glibc_c_LDADD = libmulti.a libglibc-c.a -lrt
	-try_newlib_c_SOURCES =
	-try_newlib_c_LDADD = libmulti.a libnewlib-c.a -lrt
	-try_plain_SOURCES =
	-try_plain_LDADD = libmulti.a libplain.a -lrt
	-
	-# Architecture specific
	-
	-if HOST_AARCH32
	-
	-if WITH_NEON
	-# Pull in the NEON specific files
	-neon_bionic_a9_sources = \
	- reference/bionic-a9/memcpy.S \
	- reference/bionic-a9/memset.S
	-neon_bionic_a15_sources = \
	- reference/bionic-a15/memcpy.S \
	- reference/bionic-a15/memset.S
	-fpu_flags = -mfpu=neon
	-else
	-if WITH_VFP
	-fpu_flags = -mfpu=vfp
	-else
	-fpu_flags = -msoft-float
	-endif
	-endif
	-
	-# Benchmarks and example programs
	-noinst_PROGRAMS += \
	- try-bionic-a9 \
	- try-bionic-a15 \
	- try-csl \
	- try-glibc \
	- try-newlib \
	- try-newlib-xscale
	-
	-# Libraries used in the benchmarks and examples
	-noinst_LIBRARIES += \
	- libbionic-a9.a \
	- libbionic-a15.a \
	- libcsl.a \
	- libglibc.a \
	- libnewlib.a \
	- libnewlib-xscale.a
	-
	-# Main library
	-libcortex_strings_la_SOURCES = \
	- src/thumb-2/strcpy.c \
	- src/arm/memchr.S \
	- src/arm/strchr.S \
	- src/thumb-2/strlen.S \
	- src/arm/memset.S \
	- src/arm/memcpy.S \
	- src/arm/strcmp.S
	-
	-# Libraries containing the difference reference versions
	-libbionic_a9_a_SOURCES = \
	- $(neon_bionic_a9_sources) \
	- reference/bionic-a9/memcmp.S \
	- reference/bionic-a9/strcmp.S \
	- reference/bionic-a9/strcpy.S \
	- reference/bionic-a9/strlen.c
	-
	-libbionic_a9_a_CFLAGS = -Wa,-mimplicit-it=thumb
	-
	-libbionic_a15_a_SOURCES = \
	- $(neon_bionic_a15_sources) \
	- reference/bionic-a15/memcmp.S \
	- reference/bionic-a15/strcmp.S \
	- reference/bionic-a15/strcpy.S \
	- reference/bionic-a15/strlen.c
	-
	-libbionic_a15_a_CFLAGS = -Wa,-mimplicit-it=thumb
	-
	-libcsl_a_SOURCES = \
	- reference/csl/memcpy.c \
	- reference/csl/memset.c \
	- reference/csl/arm_asm.h
	-
	-libglibc_a_SOURCES = \
	- reference/glibc/memcpy.S \
	- reference/glibc/memset.S \
	- reference/glibc/strchr.S \
	- reference/glibc/strlen.S
	-
	-libnewlib_a_SOURCES = \
	- reference/newlib/memcpy.S \
	- reference/newlib/strcmp.S \
	- reference/newlib/strcpy.c \
	- reference/newlib/strlen.c \
	- reference/newlib/arm_asm.h \
	- reference/newlib/shim.h
	-
	-libnewlib_xscale_a_SOURCES = \
	- reference/newlib-xscale/memchr.c \
	- reference/newlib-xscale/memcpy.c \
	- reference/newlib-xscale/memset.c \
	- reference/newlib-xscale/strchr.c \
	- reference/newlib-xscale/strcmp.c \
	- reference/newlib-xscale/strcpy.c \
	- reference/newlib-xscale/strlen.c \
	- reference/newlib-xscale/xscale.h
	-
	-# Flags for the benchmark helpers
	-try_bionic_a9_SOURCES =
	-try_bionic_a9_LDADD = libmulti.a libbionic-a9.a -lrt
	-try_bionic_a15_SOURCES =
	-try_bionic_a15_LDADD = libmulti.a libbionic-a15.a -lrt
	-try_csl_SOURCES =
	-try_csl_LDADD = libmulti.a libcsl.a -lrt
	-try_glibc_SOURCES =
	-try_glibc_LDADD = libmulti.a libglibc.a -lrt
	-try_newlib_SOURCES =
	-try_newlib_LDADD = libmulti.a libnewlib.a -lrt
	-try_newlib_xscale_SOURCES =
	-try_newlib_xscale_LDADD = libmulti.a libnewlib-xscale.a -lrt
	-
	-AM_CPPFLAGS = $(fpu_flags)
	-AM_LDFLAGS = $(fpu_flags)
	-
	-endif
	-
	-# aarch64 specific
	-if HOST_AARCH64
	-
	-libcortex_strings_la_SOURCES = \
	- src/aarch64/memchr.S \
	- src/aarch64/memcmp.S \
	- src/aarch64/memcpy.S \
	- src/aarch64/memmove.S \
	- src/aarch64/memset.S \
	- src/aarch64/strchr.S \
	- src/aarch64/strchrnul.S \
	- src/aarch64/strcmp.S \
	- src/aarch64/strcpy.S \
	- src/aarch64/strlen.S \
	- src/aarch64/strncmp.S \
	- src/aarch64/strnlen.S
	-
	-endif
	-
	-libcortex_strings_la_LDFLAGS = -version-info 1:0:0
	-
	-AM_CFLAGS = \
	- -std=gnu99 -Wall \
	- -fno-builtin -fno-stack-protector -U_FORTIFY_SOURCE \
	- $(AM_CPPFLAGS)
	-
	-if WITH_SUBMACHINE
	-AM_CFLAGS += \
	- -mtune=$(submachine)
	-endif
	-
	-EXTRA_DIST = \
	- tests/hp-timing.h \
	- tests/test-string.h \
	- tests/test-skeleton.c \
	- scripts/add-license.sh \
	- scripts/bench.py \
	- scripts/fixup.py \
	- scripts/libplot.py \
	- scripts/plot-align.py \
	- scripts/plot.py \
	- scripts/plot-sizes.py \
	- scripts/plot-top.py \
	- scripts/trim.sh \
	- autogen.sh
	diff --git a/contrib/cortex-strings/README b/contrib/cortex-strings/README
	deleted file mode 100644
	--- a/contrib/cortex-strings/README
	+++ /dev/null
	@@ -1,111 +0,0 @@
	-= Cortex-A String Routines =
	-
	-This package contains optimised string routines including memcpy(), memset(),
	-strcpy(), strlen() for the ARM Cortex-A series of cores.
	-
	-Various implementations of these routines are provided, including generic
	-implementations for ARMv7-A cores with/without Neon, Thumb2 implementations
	-and generic implementations for cores supporting AArch64.
	-
	-== Getting started ==
	-First configure and then install libcortex-strings.so. To make other
	-applications use this library, either add -lcortex-strings to the link
	-command or use LD_PRELOAD to load the library into existing applications.
	-
	-Our intent is to get these routines into the common C libraries such
	-as GLIBC, Bionic, and Newlib. Your system may already include them!
	-
	-== Contents ==
	- * src/ contains the routines themselves
	- * tests/ contains the unit tests
	- * reference/ contains reference copies of other ARM-focused
	- implementations gathered from around the Internet
	- * benchmarks/ contains various benchmarks, tools, and scripts used to
	- check and report on the different implementations.
	-
	-The src directory contains different variants organised by the
	-implementation they run on and optional features used. For example:
	- * src/thumb-2 contains generic non-NEON routines for AArch32 (with Thumb-2).
	- * src/arm contains tuned routines for Cortex-A class processors.
	- * src/aarch64 contains generic routines for AArch64.
	- * src/thumb contains generic routines for armv6-M (with Thumb).
	-
	-== Reference versions ==
	-reference/ contains versions collected from various popular Open
	-Source libraries. These have been modified for use in benchmarking.
	-Please refer to the individual files for any licensing terms.
	-
	-The routines were collected from the following releases:
	- * EGLIBC 2.13
	- * Newlib 1.19.0
	- * Bionic android-2.3.5_r1
	-
	-== Licensing ==
	-All Linaro-authored routines are under the modified BSD license:
	-
	-Copyright (c) 2011, Linaro Limited
	-All rights reserved.
	-
	-Redistribution and use in source and binary forms, with or without
	-modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
	-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	-
	-All ARM-authored routines are under the modified BSD license:
	-
	-Copyright (c) 2014 ARM Ltd
	-All rights reserved.
	-
	-Redistribution and use in source and binary forms, with or without
	-modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
	-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	-
	-All third party routines are under a GPL compatible license.
	-
	-== Notes and Limitations ==
	-Some of the implementations have been collected from other
	-projects and have a variety of licenses and copyright holders.
	-
	-== Style ==
	-Assembly code attempts to follow the GLIBC coding convetions. They
	-are:
	- * Copyright headers in C style comment blocks
	- * Instructions indented with one tab
	- * Operands indented with one tab
	- * Text is wrapped at 70 characters
	- * End of line comments are fine
	diff --git a/contrib/cortex-strings/autogen.sh b/contrib/cortex-strings/autogen.sh
	deleted file mode 100755
	--- a/contrib/cortex-strings/autogen.sh
	+++ /dev/null
	@@ -1,69 +0,0 @@
	-#!/bin/sh
	-#
	-# autogen.sh glue for hplip
	-#
	-# HPLIP used to have five or so different autotools trees. Upstream
	-# has reduced it to two. Still, this script is capable of cleaning
	-# just about any possible mess of autoconf files.
	-#
	-# BE CAREFUL with trees that are not completely automake-generated,
	-# this script deletes all Makefile.in files it can find.
	-#
	-# Requires: automake 1.9, autoconf 2.57+
	-# Conflicts: autoconf 2.13
	-set -e
	-
	-# Refresh GNU autotools toolchain.
	-echo Cleaning autotools files...
	-find -type d -name autom4te.cache -print0 \| xargs -0 rm -rf \;
	-find -type f \( -name missing -o -name install-sh -o -name mkinstalldirs \
	- -o -name depcomp -o -name ltmain.sh -o -name configure \
	- -o -name config.sub -o -name config.guess \
	- -o -name Makefile.in \) -print0 \| xargs -0 rm -f
	-
	-echo Running autoreconf...
	-autoreconf --force --install
	-
	-# For the Debian package build
	-test -d debian && {
	- # link these in Debian builds
	- rm -f config.sub config.guess
	- ln -s /usr/share/misc/config.sub .
	- ln -s /usr/share/misc/config.guess .
	-
	- # refresh list of executable scripts, to avoid possible breakage if
	- # upstream tarball does not include the file or if it is mispackaged
	- # for whatever reason.
	- [ "$1" = "updateexec" ] && {
	- echo Generating list of executable files...
	- rm -f debian/executable.files
	- find -type f -perm +111 ! -name '.*' -fprint debian/executable.files
	- }
	-
	- # Remove any files in upstream tarball that we don't have in the Debian
	- # package (because diff cannot remove files)
	- version=`dpkg-parsechangelog \| awk '/Version:/ { print $2 }' \| sed -e 's/-[^-]\+$//'`
	- source=`dpkg-parsechangelog \| awk '/Source:/ { print $2 }' \| tr -d ' '`
	- if test -r ../${source}_${version}.orig.tar.gz ; then
	- echo Generating list of files that should be removed...
	- rm -f debian/deletable.files
	- touch debian/deletable.files
	- [ -e debian/tmp ] && rm -rf debian/tmp
	- mkdir debian/tmp
	- ( cd debian/tmp ; tar -zxf ../../../${source}_${version}.orig.tar.gz )
	- find debian/tmp/ -type f ! -name '.*' -print0 \| xargs -0 -ri echo '{}' \| \
	- while read -r i ; do
	- if test -e "${i}" ; then
	- filename=$(echo "${i}" \| sed -e 's#.*debian/tmp/[^/]\+/##')
	- test -e "${filename}" \|\| echo "${filename}" >>debian/deletable.files
	- fi
	- done
	- rm -fr debian/tmp
	- else
	- echo Emptying list of files that should be deleted...
	- rm -f debian/deletable.files
	- touch debian/deletable.files
	- fi
	-}
	-
	-exit 0
	diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry.h b/contrib/cortex-strings/benchmarks/dhry/dhry.h
	deleted file mode 100644
	--- a/contrib/cortex-strings/benchmarks/dhry/dhry.h
	+++ /dev/null
	@@ -1,311 +0,0 @@
	-/*
	- **************************************************************************
	- * DHRYSTONE 2.1 BENCHMARK PC VERSION
	- **************************************************************************
	- *
	- * "DHRYSTONE" Benchmark Program
	- * -----------------------------
	- *
	- * Version: C, Version 2.1
	- *
	- * File: dhry.h (part 1 of 3)
	- *
	- * Date: May 25, 1988
	- *
	- * Author: Reinhold P. Weicker
	- * Siemens AG, AUT E 51
	- * Postfach 3220
	- * 8520 Erlangen
	- * Germany (West)
	- * Phone: [+49]-9131-7-20330
	- * (8-17 Central European Time)
	- * Usenet: ..!mcsun!unido!estevax!weicker
	- *
	- * Original Version (in Ada) published in
	- * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984),
	- * pp. 1013 - 1030, together with the statistics
	- * on which the distribution of statements etc. is based.
	- *
	- * In this C version, the following C library functions are used:
	- * - strcpy, strcmp (inside the measurement loop)
	- * - printf, scanf (outside the measurement loop)
	- * In addition, Berkeley UNIX system calls "times ()" or "time ()"
	- * are used for execution time measurement. For measurements
	- * on other systems, these calls have to be changed.
	- *
	- * Collection of Results:
	- * Reinhold Weicker (address see above) and
	- *
	- * Rick Richardson
	- * PC Research. Inc.
	- * 94 Apple Orchard Drive
	- * Tinton Falls, NJ 07724
	- * Phone: (201) 389-8963 (9-17 EST)
	- * Usenet: ...!uunet!pcrat!rick
	- *
	- * Please send results to Rick Richardson and/or Reinhold Weicker.
	- * Complete information should be given on hardware and software used.
	- * Hardware information includes: Machine type, CPU, type and size
	- * of caches; for microprocessors: clock frequency, memory speed
	- * (number of wait states).
	- * Software information includes: Compiler (and runtime library)
	- * manufacturer and version, compilation switches, OS version.
	- * The Operating System version may give an indication about the
	- * compiler; Dhrystone itself performs no OS calls in the measurement
	- * loop.
	- *
	- * The complete output generated by the program should be mailed
	- * such that at least some checks for correctness can be made.
	- *
	- **************************************************************************
	- *
	- * This version has changes made by Roy Longbottom to conform to a common
	- * format for a series of standard benchmarks for PCs:
	- *
	- * Running time greater than 5 seconds due to inaccuracy of the PC clock.
	- *
	- * Automatic adjustment of run time, no manually inserted parameters.
	- *
	- * Initial display of calibration times to confirm linearity.
	- *
	- * Display of results within one screen (or at a slow speed as the test
	- * progresses) so that it can be seen to have run successfully.
	- *
	- * Facilities to type in details of system used etc.
	- *
	- * All results and details appended to a results file.
	- *
	- *
	- * Roy Longbottom
	- * 101323.2241@compuserve.com
	- *
	- **************************************************************************
	- *
	- * For details of history, changes, other defines, benchmark construction
	- * statistics see official versions from ftp.nosc.mil/pub/aburto where
	- * the latest table of results (dhry.tbl) are available. See also
	- * netlib@ornl.gov
	- *
	- **************************************************************************
	- *
	- * Defines: The following "Defines" are possible:
	- * -DREG=register (default: Not defined)
	- * As an approximation to what an average C programmer
	- * might do, the "register" storage class is applied
	- * (if enabled by -DREG=register)
	- * - for local variables, if they are used (dynamically)
	- * five or more times
	- * - for parameters if they are used (dynamically)
	- * six or more times
	- * Note that an optimal "register" strategy is
	- * compiler-dependent, and that "register" declarations
	- * do not necessarily lead to faster execution.
	- * -DNOSTRUCTASSIGN (default: Not defined)
	- * Define if the C compiler does not support
	- * assignment of structures.
	- * -DNOENUMS (default: Not defined)
	- * Define if the C compiler does not support
	- * enumeration types.
	- ***************************************************************************
	- *
	- * Compilation model and measurement (IMPORTANT):
	- *
	- * This C version of Dhrystone consists of three files:
	- * - dhry.h (this file, containing global definitions and comments)
	- * - dhry_1.c (containing the code corresponding to Ada package Pack_1)
	- * - dhry_2.c (containing the code corresponding to Ada package Pack_2)
	- *
	- * The following "ground rules" apply for measurements:
	- * - Separate compilation
	- * - No procedure merging
	- * - Otherwise, compiler optimizations are allowed but should be indicated
	- * - Default results are those without register declarations
	- * See the companion paper "Rationale for Dhrystone Version 2" for a more
	- * detailed discussion of these ground rules.
	- *
	- * For 16-Bit processors (e.g. 80186, 80286), times for all compilation
	- * models ("small", "medium", "large" etc.) should be given if possible,
	- * together with a definition of these models for the compiler system used.
	- *
	- **************************************************************************
	- * Examples of Pentium Results
	- *
	- * Dhrystone Benchmark Version 2.1 (Language: C)
	- *
	- * Month run 4/1996
	- * PC model Escom
	- * CPU Pentium
	- * Clock MHz 100
	- * Cache 256K
	- * Options Neptune chipset
	- * OS/DOS Windows 95
	- * Compiler Watcom C/ C++ 10.5 Win386
	- * OptLevel -otexan -zp8 -fp5 -5r
	- * Run by Roy Longbottom
	- * From UK
	- * Mail 101323.2241@compuserve.com
	- *
	- * Final values (* implementation-dependent):
	- *
	- * Int_Glob: O.K. 5
	- * Bool_Glob: O.K. 1
	- * Ch_1_Glob: O.K. A
	- * Ch_2_Glob: O.K. B
	- * Arr_1_Glob[8]: O.K. 7
	- * Arr_2_Glob8/7: O.K. 1600010
	- * Ptr_Glob->
	- * Ptr_Comp: * 98008
	- * Discr: O.K. 0
	- * Enum_Comp: O.K. 2
	- * Int_Comp: O.K. 17
	- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
	- * Next_Ptr_Glob->
	- * Ptr_Comp: * 98008 same as above
	- * Discr: O.K. 0
	- * Enum_Comp: O.K. 1
	- * Int_Comp: O.K. 18
	- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
	- * Int_1_Loc: O.K. 5
	- * Int_2_Loc: O.K. 13
	- * Int_3_Loc: O.K. 7
	- * Enum_Loc: O.K. 1
	- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
	- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
	- *
	- * Register option Selected.
	- *
	- * Microseconds 1 loop: 4.53
	- * Dhrystones / second: 220690
	- * VAX MIPS rating: 125.61
	- *
	- *
	- * Dhrystone Benchmark Version 2.1 (Language: C)
	- *
	- * Month run 4/1996
	- * PC model Escom
	- * CPU Pentium
	- * Clock MHz 100
	- * Cache 256K
	- * Options Neptune chipset
	- * OS/DOS Windows 95
	- * Compiler Watcom C/ C++ 10.5 Win386
	- * OptLevel No optimisation
	- * Run by Roy Longbottom
	- * From UK
	- * Mail 101323.2241@compuserve.com
	- *
	- * Final values (* implementation-dependent):
	- *
	- * Int_Glob: O.K. 5
	- * Bool_Glob: O.K. 1
	- * Ch_1_Glob: O.K. A
	- * Ch_2_Glob: O.K. B
	- * Arr_1_Glob[8]: O.K. 7
	- * Arr_2_Glob8/7: O.K. 320010
	- * Ptr_Glob->
	- * Ptr_Comp: * 98004
	- * Discr: O.K. 0
	- * Enum_Comp: O.K. 2
	- * Int_Comp: O.K. 17
	- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
	- * Next_Ptr_Glob->
	- * Ptr_Comp: * 98004 same as above
	- * Discr: O.K. 0
	- * Enum_Comp: O.K. 1
	- * Int_Comp: O.K. 18
	- * Str_Comp: O.K. DHRYSTONE PROGRAM, SOME STRING
	- * Int_1_Loc: O.K. 5
	- * Int_2_Loc: O.K. 13
	- * Int_3_Loc: O.K. 7
	- * Enum_Loc: O.K. 1
	- * Str_1_Loc: O.K. DHRYSTONE PROGRAM, 1'ST STRING
	- * Str_2_Loc: O.K. DHRYSTONE PROGRAM, 2'ND STRING
	- *
	- * Register option Not selected.
	- *
	- * Microseconds 1 loop: 20.06
	- * Dhrystones / second: 49844
	- * VAX MIPS rating: 28.37
	- *
	- **************************************************************************
	- */
	-
	-/* Compiler and system dependent definitions: */
	-
	-#ifndef TIME
	-#define TIMES
	-#endif
	- /* Use times(2) time function unless */
	- /* explicitly defined otherwise */
	-
	-#ifdef TIMES
	-/* #include <sys/types.h>
	- #include <sys/times.h> */
	- /* for "times" */
	-#endif
	-
	-#define Mic_secs_Per_Second 1000000.0
	- /* Berkeley UNIX C returns process times in seconds/HZ */
	-
	-#ifdef NOSTRUCTASSIGN
	-#define structassign(d, s) memcpy(&(d), &(s), sizeof(d))
	-#else
	-#define structassign(d, s) d = s
	-#endif
	-
	-#ifdef NOENUM
	-#define Ident_1 0
	-#define Ident_2 1
	-#define Ident_3 2
	-#define Ident_4 3
	-#define Ident_5 4
	- typedef int Enumeration;
	-#else
	- typedef enum {Ident_1, Ident_2, Ident_3, Ident_4, Ident_5}
	- Enumeration;
	-#endif
	- /* for boolean and enumeration types in Ada, Pascal */
	-
	-/* General definitions: */
	-
	-#include <stdio.h>
	-#include <string.h>
	-
	- /* for strcpy, strcmp */
	-
	-#define Null 0
	- /* Value of a Null pointer */
	-#define true 1
	-#define false 0
	-
	-typedef int One_Thirty;
	-typedef int One_Fifty;
	-typedef char Capital_Letter;
	-typedef int Boolean;
	-typedef char Str_30 [31];
	-typedef int Arr_1_Dim [50];
	-typedef int Arr_2_Dim [50] [50];
	-
	-typedef struct record
	- {
	- struct record *Ptr_Comp;
	- Enumeration Discr;
	- union {
	- struct {
	- Enumeration Enum_Comp;
	- int Int_Comp;
	- char Str_Comp [31];
	- } var_1;
	- struct {
	- Enumeration E_Comp_2;
	- char Str_2_Comp [31];
	- } var_2;
	- struct {
	- char Ch_1_Comp;
	- char Ch_2_Comp;
	- } var_3;
	- } variant;
	- } Rec_Type, *Rec_Pointer;
	-
	-
	-
	diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry_1.c b/contrib/cortex-strings/benchmarks/dhry/dhry_1.c
	deleted file mode 100644
	--- a/contrib/cortex-strings/benchmarks/dhry/dhry_1.c
	+++ /dev/null
	@@ -1,778 +0,0 @@
	-/*
	- *************************************************************************
	- *
	- * "DHRYSTONE" Benchmark Program
	- * -----------------------------
	- *
	- * Version: C, Version 2.1
	- *
	- * File: dhry_1.c (part 2 of 3)
	- *
	- * Date: May 25, 1988
	- *
	- * Author: Reinhold P. Weicker
	- *
	- *************************************************************************
	- */
	-
	- #include <time.h>
	- #include <stdlib.h>
	- #include <stdio.h>
	- #include "dhry.h"
	- /COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER COMPILER/
	-
	- #ifdef COW
	- #define compiler "Watcom C/C++ 10.5 Win386"
	- #define options " -otexan -zp8 -5r -ms"
	- #endif
	- #ifdef CNW
	- #define compiler "Watcom C/C++ 10.5 Win386"
	- #define options " No optimisation"
	- #endif
	- #ifdef COD
	- #define compiler "Watcom C/C++ 10.5 Dos4GW"
	- #define options " -otexan -zp8 -5r -ms"
	- #endif
	- #ifdef CND
	- #define compiler "Watcom C/C++ 10.5 Dos4GW"
	- #define options " No optimisation"
	- #endif
	- #ifdef CONT
	- #define compiler "Watcom C/C++ 10.5 Win32NT"
	- #define options " -otexan -zp8 -5r -ms"
	- #endif
	- #ifdef CNNT
	- #define compiler "Watcom C/C++ 10.5 Win32NT"
	- #define options " No optimisation"
	- #endif
	- #ifdef COO2
	- #define compiler "Watcom C/C++ 10.5 OS/2-32"
	- #define options " -otexan -zp8 -5r -ms"
	- #endif
	- #ifdef CNO2
	- #define compiler "Watcom C/C++ 10.5 OS/2-32"
	- #define options " No optimisation"
	- #endif
	-
	-
	-/* Global Variables: */
	-
	-Rec_Pointer Ptr_Glob,
	- Next_Ptr_Glob;
	-int Int_Glob;
	- Boolean Bool_Glob;
	- char Ch_1_Glob,
	- Ch_2_Glob;
	- int Arr_1_Glob [50];
	- int Arr_2_Glob [50] [50];
	- int getinput = 1;
	-
	-
	- char Reg_Define[100] = "Register option Selected.";
	-
	- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
	- Capital_Letter Ch_2_Par_Val);
	- /*
	- forward declaration necessary since Enumeration may not simply be int
	- */
	-
	- #ifndef ROPT
	- #define REG
	- /* REG becomes defined as empty */
	- /* i.e. no register variables */
	- #else
	- #define REG register
	- #endif
	-
	- void Proc_1 (REG Rec_Pointer Ptr_Val_Par);
	- void Proc_2 (One_Fifty *Int_Par_Ref);
	- void Proc_3 (Rec_Pointer *Ptr_Ref_Par);
	- void Proc_4 ();
	- void Proc_5 ();
	- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par);
	- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
	- One_Fifty *Int_Par_Ref);
	- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
	- int Int_1_Par_Val, int Int_2_Par_Val);
	-
	- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref);
	-
	-
	- /* variables for time measurement: */
	-
	- #define Too_Small_Time 2
	- /* Measurements should last at least 2 seconds */
	-
	- double Begin_Time,
	- End_Time,
	- User_Time;
	-
	- double Microseconds,
	- Dhrystones_Per_Second,
	- Vax_Mips;
	-
	- /* end of variables for time measurement */
	-
	-
	- void main (int argc, char *argv[])
	- /*****/
	-
	- /* main program, corresponds to procedures */
	- /* Main and Proc_0 in the Ada version */
	- {
	- double dtime();
	-
	- One_Fifty Int_1_Loc;
	- REG One_Fifty Int_2_Loc;
	- One_Fifty Int_3_Loc;
	- REG char Ch_Index;
	- Enumeration Enum_Loc;
	- Str_30 Str_1_Loc;
	- Str_30 Str_2_Loc;
	- REG int Run_Index;
	- REG int Number_Of_Runs;
	- int endit, count = 10;
	- FILE *Ap;
	- char general[9][80] = {" "};
	-
	- /* Initializations */
	- if (argc > 1)
	- {
	- switch (argv[1][0])
	- {
	- case 'N':
	- getinput = 0;
	- break;
	- case 'n':
	- getinput = 0;
	- break;
	- }
	- }
	-
	- if ((Ap = fopen("Dhry.txt","a+")) == NULL)
	- {
	- printf("Can not open Dhry.txt\n\n");
	- printf("Press any key\n");
	- exit(1);
	- }
	-
	-/***********************************************************************
	- * Change for compiler and optimisation used *
	- ***********************************************************************/
	-
	- Next_Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
	- Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
	-
	- Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
	- Ptr_Glob->Discr = Ident_1;
	- Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
	- Ptr_Glob->variant.var_1.Int_Comp = 40;
	- strcpy (Ptr_Glob->variant.var_1.Str_Comp,
	- "DHRYSTONE PROGRAM, SOME STRING");
	- strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
	-
	- Arr_2_Glob [8][7] = 10;
	- /* Was missing in published program. Without this statement, */
	- /* Arr_2_Glob [8][7] would have an undefined value. */
	- /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
	- /* overflow may occur for this array element. */
	-
	- printf ("\n");
	- printf ("Dhrystone Benchmark, Version 2.1 (Language: C or C++)\n");
	- printf ("\n");
	-
	- if (getinput == 0)
	- {
	- printf ("No run time input data\n\n");
	- }
	- else
	- {
	- printf ("With run time input data\n\n");
	- }
	-
	- printf ("Compiler %s\n", compiler);
	- printf ("Optimisation %s\n", options);
	- #ifdef ROPT
	- printf ("Register option selected\n\n");
	- #else
	- printf ("Register option not selected\n\n");
	- strcpy(Reg_Define, "Register option Not selected.");
	- #endif
	-
	- /*
	- if (Reg)
	- {
	- printf ("Program compiled with 'register' attribute\n");
	- printf ("\n");
	- }
	- else
	- {
	- printf ("Program compiled without 'register' attribute\n");
	- printf ("\n");
	- }
	-
	- printf ("Please give the number of runs through the benchmark: ");
	- {
	- int n;
	- scanf ("%d", &n);
	- Number_Of_Runs = n;
	- }
	- printf ("\n");
	- printf ("Execution starts, %d runs through Dhrystone\n",
	- Number_Of_Runs);
	- */
	-
	- Number_Of_Runs = 5000;
	-
	- do
	- {
	-
	- Number_Of_Runs = Number_Of_Runs * 2;
	- count = count - 1;
	- Arr_2_Glob [8][7] = 10;
	-
	- /***************/
	- /* Start timer */
	- /***************/
	-
	- Begin_Time = dtime();
	-
	- for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
	- {
	-
	- Proc_5();
	- Proc_4();
	- /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
	- Int_1_Loc = 2;
	- Int_2_Loc = 3;
	- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
	- Enum_Loc = Ident_2;
	- Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
	- /* Bool_Glob == 1 */
	- while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
	- {
	- Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
	- /* Int_3_Loc == 7 */
	- Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
	- /* Int_3_Loc == 7 */
	- Int_1_Loc += 1;
	- } /* while */
	- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
	- Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
	- /* Int_Glob == 5 */
	- Proc_1 (Ptr_Glob);
	- for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
	- /* loop body executed twice */
	- {
	- if (Enum_Loc == Func_1 (Ch_Index, 'C'))
	- /* then, not executed */
	- {
	- Proc_6 (Ident_1, &Enum_Loc);
	- strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
	- Int_2_Loc = Run_Index;
	- Int_Glob = Run_Index;
	- }
	- }
	- /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
	- Int_2_Loc = Int_2_Loc * Int_1_Loc;
	- Int_1_Loc = Int_2_Loc / Int_3_Loc;
	- Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
	- /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
	- Proc_2 (&Int_1_Loc);
	- /* Int_1_Loc == 5 */
	-
	- } /* loop "for Run_Index" */
	-
	- /**************/
	- /* Stop timer */
	- /**************/
	-
	- End_Time = dtime();
	- User_Time = End_Time - Begin_Time;
	-
	- printf ("%12.0f runs %6.2f seconds \n",(double) Number_Of_Runs, User_Time);
	- if (User_Time > 5)
	- {
	- count = 0;
	- }
	- else
	- {
	- if (User_Time < 0.1)
	- {
	- Number_Of_Runs = Number_Of_Runs * 5;
	- }
	- }
	- } /* calibrate/run do while */
	- while (count >0);
	-
	- printf ("\n");
	- printf ("Final values (* implementation-dependent):\n");
	- printf ("\n");
	- printf ("Int_Glob: ");
	- if (Int_Glob == 5) printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Int_Glob);
	-
	- printf ("Bool_Glob: ");
	- if (Bool_Glob == 1) printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d\n", Bool_Glob);
	-
	- printf ("Ch_1_Glob: ");
	- if (Ch_1_Glob == 'A') printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%c ", Ch_1_Glob);
	-
	- printf ("Ch_2_Glob: ");
	- if (Ch_2_Glob == 'B') printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%c\n", Ch_2_Glob);
	-
	- printf ("Arr_1_Glob[8]: ");
	- if (Arr_1_Glob[8] == 7) printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Arr_1_Glob[8]);
	-
	- printf ("Arr_2_Glob8/7: ");
	- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%10d\n", Arr_2_Glob[8][7]);
	-
	- printf ("Ptr_Glob-> ");
	- printf (" Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
	-
	- printf (" Discr: ");
	- if (Ptr_Glob->Discr == 0) printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Ptr_Glob->Discr);
	-
	- printf ("Enum_Comp: ");
	- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
	-
	- printf (" Int_Comp: ");
	- if (Ptr_Glob->variant.var_1.Int_Comp == 17) printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Ptr_Glob->variant.var_1.Int_Comp);
	-
	- printf ("Str_Comp: ");
	- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
	- "DHRYSTONE PROGRAM, SOME STRING") == 0)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%s\n", Ptr_Glob->variant.var_1.Str_Comp);
	-
	- printf ("Next_Ptr_Glob-> ");
	- printf (" Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
	- printf (" same as above\n");
	-
	- printf (" Discr: ");
	- if (Next_Ptr_Glob->Discr == 0)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Next_Ptr_Glob->Discr);
	-
	- printf ("Enum_Comp: ");
	- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
	-
	- printf (" Int_Comp: ");
	- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Next_Ptr_Glob->variant.var_1.Int_Comp);
	-
	- printf ("Str_Comp: ");
	- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
	- "DHRYSTONE PROGRAM, SOME STRING") == 0)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
	-
	- printf ("Int_1_Loc: ");
	- if (Int_1_Loc == 5)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Int_1_Loc);
	-
	- printf ("Int_2_Loc: ");
	- if (Int_2_Loc == 13)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d\n", Int_2_Loc);
	-
	- printf ("Int_3_Loc: ");
	- if (Int_3_Loc == 7)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d ", Int_3_Loc);
	-
	- printf ("Enum_Loc: ");
	- if (Enum_Loc == 1)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%d\n", Enum_Loc);
	-
	- printf ("Str_1_Loc: ");
	- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%s\n", Str_1_Loc);
	-
	- printf ("Str_2_Loc: ");
	- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
	- printf ("O.K. ");
	- else printf ("WRONG ");
	- printf ("%s\n", Str_2_Loc);
	-
	- printf ("\n");
	-
	-
	- if (User_Time < Too_Small_Time)
	- {
	- printf ("Measured time too small to obtain meaningful results\n");
	- printf ("Please increase number of runs\n");
	- printf ("\n");
	- }
	- else
	- {
	- Microseconds = User_Time * Mic_secs_Per_Second
	- / (double) Number_Of_Runs;
	- Dhrystones_Per_Second = (double) Number_Of_Runs / User_Time;
	- Vax_Mips = Dhrystones_Per_Second / 1757.0;
	-
	- printf ("Microseconds for one run through Dhrystone: ");
	- printf ("%12.2lf \n", Microseconds);
	- printf ("Dhrystones per Second: ");
	- printf ("%10.0lf \n", Dhrystones_Per_Second);
	- printf ("VAX MIPS rating = ");
	- printf ("%12.2lf \n",Vax_Mips);
	- printf ("\n");
	-
	-/************************************************************************
	- * Type details of hardware, software etc. *
	- ************************************************************************/
	-
	- if (getinput == 1)
	- {
	- printf ("Enter the following which will be added with results to file DHRY.TXT\n");
	- printf ("When submitting a number of results you need only provide details once\n");
	- printf ("but a cross reference such as an abbreviated CPU type would be useful.\n");
	- printf ("You can kill (exit or close) the program now and no data will be added.\n\n");
	-
	- printf ("PC Supplier/model ? ");
	- gets(general[1]);
	-
	- printf ("CPU chip ? ");
	- gets(general[2]);
	-
	- printf ("Clock MHz ? ");
	- gets(general[3]);
	-
	- printf ("Cache size ? ");
	- gets(general[4]);
	-
	- printf ("Chipset & H/W options ? ");
	- gets(general[5]);
	-
	- printf ("OS/DOS version ? ");
	- gets(general[6]);
	-
	- printf ("Your name ? ");
	- gets(general[7]);
	-
	- printf ("Company/Location ? ");
	- gets(general[8]);
	-
	- printf ("E-mail address ? ");
	- gets(general[0]);
	- }
	-/************************************************************************
	- * Add results to output file Dhry.txt *
	- ************************************************************************/
	- fprintf (Ap, "-------------------- -----------------------------------"
	- "\n");
	- fprintf (Ap, "Dhrystone Benchmark Version 2.1 (Language: C++)\n\n");
	- fprintf (Ap, "PC model %s\n", general[1]);
	- fprintf (Ap, "CPU %s\n", general[2]);
	- fprintf (Ap, "Clock MHz %s\n", general[3]);
	- fprintf (Ap, "Cache %s\n", general[4]);
	- fprintf (Ap, "Options %s\n", general[5]);
	- fprintf (Ap, "OS/DOS %s\n", general[6]);
	- fprintf (Ap, "Compiler %s\n", compiler);
	- fprintf (Ap, "OptLevel %s\n", options);
	- fprintf (Ap, "Run by %s\n", general[7]);
	- fprintf (Ap, "From %s\n", general[8]);
	- fprintf (Ap, "Mail %s\n\n", general[0]);
	-
	- fprintf (Ap, "Final values (* implementation-dependent):\n");
	- fprintf (Ap, "\n");
	- fprintf (Ap, "Int_Glob: ");
	- if (Int_Glob == 5) fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Int_Glob);
	-
	- fprintf (Ap, "Bool_Glob: ");
	- if (Bool_Glob == 1) fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Bool_Glob);
	-
	- fprintf (Ap, "Ch_1_Glob: ");
	- if (Ch_1_Glob == 'A') fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%c\n", Ch_1_Glob);
	-
	- fprintf (Ap, "Ch_2_Glob: ");
	- if (Ch_2_Glob == 'B') fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%c\n", Ch_2_Glob);
	-
	- fprintf (Ap, "Arr_1_Glob[8]: ");
	- if (Arr_1_Glob[8] == 7) fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Arr_1_Glob[8]);
	-
	- fprintf (Ap, "Arr_2_Glob8/7: ");
	- if (Arr_2_Glob[8][7] == Number_Of_Runs + 10)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%10d\n", Arr_2_Glob[8][7]);
	-
	- fprintf (Ap, "Ptr_Glob-> \n");
	- fprintf (Ap, " Ptr_Comp: * %d\n", (int) Ptr_Glob->Ptr_Comp);
	-
	- fprintf (Ap, " Discr: ");
	- if (Ptr_Glob->Discr == 0) fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Ptr_Glob->Discr);
	-
	- fprintf (Ap, " Enum_Comp: ");
	- if (Ptr_Glob->variant.var_1.Enum_Comp == 2)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Enum_Comp);
	-
	- fprintf (Ap, " Int_Comp: ");
	- if (Ptr_Glob->variant.var_1.Int_Comp == 17) fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Ptr_Glob->variant.var_1.Int_Comp);
	-
	- fprintf (Ap, " Str_Comp: ");
	- if (strcmp(Ptr_Glob->variant.var_1.Str_Comp,
	- "DHRYSTONE PROGRAM, SOME STRING") == 0)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%s\n", Ptr_Glob->variant.var_1.Str_Comp);
	-
	- fprintf (Ap, "Next_Ptr_Glob-> \n");
	- fprintf (Ap, " Ptr_Comp: * %d", (int) Next_Ptr_Glob->Ptr_Comp);
	- fprintf (Ap, " same as above\n");
	-
	- fprintf (Ap, " Discr: ");
	- if (Next_Ptr_Glob->Discr == 0)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Next_Ptr_Glob->Discr);
	-
	- fprintf (Ap, " Enum_Comp: ");
	- if (Next_Ptr_Glob->variant.var_1.Enum_Comp == 1)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Enum_Comp);
	-
	- fprintf (Ap, " Int_Comp: ");
	- if (Next_Ptr_Glob->variant.var_1.Int_Comp == 18)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Next_Ptr_Glob->variant.var_1.Int_Comp);
	-
	- fprintf (Ap, " Str_Comp: ");
	- if (strcmp(Next_Ptr_Glob->variant.var_1.Str_Comp,
	- "DHRYSTONE PROGRAM, SOME STRING") == 0)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%s\n", Next_Ptr_Glob->variant.var_1.Str_Comp);
	-
	- fprintf (Ap, "Int_1_Loc: ");
	- if (Int_1_Loc == 5)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Int_1_Loc);
	-
	- fprintf (Ap, "Int_2_Loc: ");
	- if (Int_2_Loc == 13)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Int_2_Loc);
	-
	- fprintf (Ap, "Int_3_Loc: ");
	- if (Int_3_Loc == 7)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Int_3_Loc);
	-
	- fprintf (Ap, "Enum_Loc: ");
	- if (Enum_Loc == 1)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%d\n", Enum_Loc);
	-
	- fprintf (Ap, "Str_1_Loc: ");
	- if (strcmp(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING") == 0)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%s\n", Str_1_Loc);
	-
	- fprintf (Ap, "Str_2_Loc: ");
	- if (strcmp(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING") == 0)
	- fprintf (Ap, "O.K. ");
	- else fprintf (Ap, "WRONG ");
	- fprintf (Ap, "%s\n", Str_2_Loc);
	-
	-
	- fprintf (Ap, "\n");
	- fprintf(Ap,"%s\n",Reg_Define);
	- fprintf (Ap, "\n");
	- fprintf(Ap,"Microseconds 1 loop: %12.2lf\n",Microseconds);
	- fprintf(Ap,"Dhrystones / second: %10.0lf\n",Dhrystones_Per_Second);
	- fprintf(Ap,"VAX MIPS rating: %12.2lf\n\n",Vax_Mips);
	- fclose(Ap);
	- }
	-
	- printf ("\n");
	- printf ("A new results file will have been created in the same directory as the\n");
	- printf (".EXE files if one did not already exist. If you made a mistake on input, \n");
	- printf ("you can use a text editor to correct it, delete the results or copy \n");
	- printf ("them to a different file name. If you intend to run multiple tests you\n");
	- printf ("you may wish to rename DHRY.TXT with a more informative title.\n\n");
	- printf ("Please submit feedback and results files as a posting in Section 12\n");
	- printf ("or to Roy_Longbottom@compuserve.com\n\n");
	-
	- if (getinput == 1)
	- {
	- printf("Press any key to exit\n");
	- printf ("\nIf this is displayed you must close the window in the normal way\n");
	- }
	- }
	-
	-
	- void Proc_1 (REG Rec_Pointer Ptr_Val_Par)
	- /******************/
	-
	- /* executed once */
	- {
	- REG Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp;
	- /* == Ptr_Glob_Next */
	- /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */
	- /* corresponds to "rename" in Ada, "with" in Pascal */
	-
	- structassign (Ptr_Val_Par->Ptr_Comp, Ptr_Glob);
	- Ptr_Val_Par->variant.var_1.Int_Comp = 5;
	- Next_Record->variant.var_1.Int_Comp
	- = Ptr_Val_Par->variant.var_1.Int_Comp;
	- Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp;
	- Proc_3 (&Next_Record->Ptr_Comp);
	- /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp
	- == Ptr_Glob->Ptr_Comp */
	- if (Next_Record->Discr == Ident_1)
	- /* then, executed */
	- {
	- Next_Record->variant.var_1.Int_Comp = 6;
	- Proc_6 (Ptr_Val_Par->variant.var_1.Enum_Comp,
	- &Next_Record->variant.var_1.Enum_Comp);
	- Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp;
	- Proc_7 (Next_Record->variant.var_1.Int_Comp, 10,
	- &Next_Record->variant.var_1.Int_Comp);
	- }
	- else /* not executed */
	- structassign (Ptr_Val_Par, Ptr_Val_Par->Ptr_Comp);
	- } /* Proc_1 */
	-
	-
	- void Proc_2 (One_Fifty *Int_Par_Ref)
	- /******************/
	- /* executed once */
	- /* Int_Par_Ref == 1, becomes 4 /
	-
	- {
	- One_Fifty Int_Loc;
	- Enumeration Enum_Loc;
	-
	- Int_Loc = *Int_Par_Ref + 10;
	- do /* executed once */
	- if (Ch_1_Glob == 'A')
	- /* then, executed */
	- {
	- Int_Loc -= 1;
	- *Int_Par_Ref = Int_Loc - Int_Glob;
	- Enum_Loc = Ident_1;
	- } /* if */
	- while (Enum_Loc != Ident_1); /* true */
	- } /* Proc_2 */
	-
	-
	- void Proc_3 (Rec_Pointer *Ptr_Ref_Par)
	- /******************/
	- /* executed once */
	- /* Ptr_Ref_Par becomes Ptr_Glob */
	-
	- {
	- if (Ptr_Glob != Null)
	- /* then, executed */
	- *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp;
	- Proc_7 (10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp);
	- } /* Proc_3 */
	-
	-
	-void Proc_4 () /* without parameters */
	- /*******/
	- /* executed once */
	- {
	- Boolean Bool_Loc;
	-
	- Bool_Loc = Ch_1_Glob == 'A';
	- Bool_Glob = Bool_Loc \| Bool_Glob;
	- Ch_2_Glob = 'B';
	- } /* Proc_4 */
	-
	-
	- void Proc_5 () /* without parameters */
	- /*******/
	- /* executed once */
	- {
	- Ch_1_Glob = 'A';
	- Bool_Glob = false;
	- } /* Proc_5 */
	-
	-
	- /* Procedure for the assignment of structures, */
	- /* if the C compiler doesn't support this feature */
	- #ifdef NOSTRUCTASSIGN
	- memcpy (d, s, l)
	- register char *d;
	- register char *s;
	- register int l;
	- {
	- while (l--) d++ = s++;
	- }
	- #endif
	-
	-
	-double dtime()
	-{
	-
	- /* #include <ctype.h> */
	-
	- #define HZ CLOCKS_PER_SEC
	- clock_t tnow;
	-
	- double q;
	- tnow = clock();
	- q = (double)tnow / (double)HZ;
	- return q;
	-}
	diff --git a/contrib/cortex-strings/benchmarks/dhry/dhry_2.c b/contrib/cortex-strings/benchmarks/dhry/dhry_2.c
	deleted file mode 100644
	--- a/contrib/cortex-strings/benchmarks/dhry/dhry_2.c
	+++ /dev/null
	@@ -1,186 +0,0 @@
	- /*
	- *************************************************************************
	- *
	- * "DHRYSTONE" Benchmark Program
	- * -----------------------------
	- *
	- * Version: C, Version 2.1
	- *
	- * File: dhry_2.c (part 3 of 3)
	- *
	- * Date: May 25, 1988
	- *
	- * Author: Reinhold P. Weicker
	- *
	- *************************************************************************
	- */
	-
	- #include "dhry.h"
	-
	- #ifndef REG
	- #define REG
	- /* REG becomes defined as empty */
	- /* i.e. no register variables */
	- #else
	- #define REG register
	- #endif
	-
	- extern int Int_Glob;
	- extern char Ch_1_Glob;
	-
	- Boolean Func_3 (Enumeration Enum_Par_Val);
	-
	- void Proc_6 (Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par)
	- /*********************************/
	- /* executed once */
	- /* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */
	-
	- {
	- *Enum_Ref_Par = Enum_Val_Par;
	- if (! Func_3 (Enum_Val_Par))
	- /* then, not executed */
	- *Enum_Ref_Par = Ident_4;
	- switch (Enum_Val_Par)
	- {
	- case Ident_1:
	- *Enum_Ref_Par = Ident_1;
	- break;
	- case Ident_2:
	- if (Int_Glob > 100)
	- /* then */
	- *Enum_Ref_Par = Ident_1;
	- else *Enum_Ref_Par = Ident_4;
	- break;
	- case Ident_3: /* executed */
	- *Enum_Ref_Par = Ident_2;
	- break;
	- case Ident_4: break;
	- case Ident_5:
	- *Enum_Ref_Par = Ident_3;
	- break;
	- } /* switch */
	- } /* Proc_6 */
	-
	-
	- void Proc_7 (One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val,
	- One_Fifty *Int_Par_Ref)
	- /**********************************************/
	- /* executed three times */
	- /* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */
	- /* Int_Par_Ref becomes 7 */
	- /* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */
	- /* Int_Par_Ref becomes 17 */
	- /* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */
	- /* Int_Par_Ref becomes 18 */
	-
	- {
	- One_Fifty Int_Loc;
	-
	- Int_Loc = Int_1_Par_Val + 2;
	- *Int_Par_Ref = Int_2_Par_Val + Int_Loc;
	- } /* Proc_7 */
	-
	-
	- void Proc_8 (Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref,
	- int Int_1_Par_Val, int Int_2_Par_Val)
	- /*********************************************************************/
	- /* executed once */
	- /* Int_Par_Val_1 == 3 */
	- /* Int_Par_Val_2 == 7 */
	-
	- {
	- REG One_Fifty Int_Index;
	- REG One_Fifty Int_Loc;
	-
	- Int_Loc = Int_1_Par_Val + 5;
	- Arr_1_Par_Ref [Int_Loc] = Int_2_Par_Val;
	- Arr_1_Par_Ref [Int_Loc+1] = Arr_1_Par_Ref [Int_Loc];
	- Arr_1_Par_Ref [Int_Loc+30] = Int_Loc;
	- for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index)
	- Arr_2_Par_Ref [Int_Loc] [Int_Index] = Int_Loc;
	- Arr_2_Par_Ref [Int_Loc] [Int_Loc-1] += 1;
	- Arr_2_Par_Ref [Int_Loc+20] [Int_Loc] = Arr_1_Par_Ref [Int_Loc];
	- Int_Glob = 5;
	- } /* Proc_8 */
	-
	-
	- Enumeration Func_1 (Capital_Letter Ch_1_Par_Val,
	- Capital_Letter Ch_2_Par_Val)
	- /*************************************************/
	- /* executed three times */
	- /* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */
	- /* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */
	- /* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */
	-
	- {
	- Capital_Letter Ch_1_Loc;
	- Capital_Letter Ch_2_Loc;
	-
	- Ch_1_Loc = Ch_1_Par_Val;
	- Ch_2_Loc = Ch_1_Loc;
	- if (Ch_2_Loc != Ch_2_Par_Val)
	- /* then, executed */
	- return (Ident_1);
	- else /* not executed */
	- {
	- Ch_1_Glob = Ch_1_Loc;
	- return (Ident_2);
	- }
	- } /* Func_1 */
	-
	-
	- Boolean Func_2 (Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref)
	- /*************************************************/
	- /* executed once */
	- /* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */
	- /* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */
	-
	- {
	- REG One_Thirty Int_Loc;
	- Capital_Letter Ch_Loc;
	-
	- Int_Loc = 2;
	- while (Int_Loc <= 2) /* loop body executed once */
	- if (Func_1 (Str_1_Par_Ref[Int_Loc],
	- Str_2_Par_Ref[Int_Loc+1]) == Ident_1)
	- /* then, executed */
	- {
	- Ch_Loc = 'A';
	- Int_Loc += 1;
	- } /* if, while */
	- if (Ch_Loc >= 'W' && Ch_Loc < 'Z')
	- /* then, not executed */
	- Int_Loc = 7;
	- if (Ch_Loc == 'R')
	- /* then, not executed */
	- return (true);
	- else /* executed */
	- {
	- if (strcmp (Str_1_Par_Ref, Str_2_Par_Ref) > 0)
	- /* then, not executed */
	- {
	- Int_Loc += 7;
	- Int_Glob = Int_Loc;
	- return (true);
	- }
	- else /* executed */
	- return (false);
	- } /* if Ch_Loc */
	- } /* Func_2 */
	-
	-
	- Boolean Func_3 (Enumeration Enum_Par_Val)
	- /***************************/
	- /* executed once */
	- /* Enum_Par_Val == Ident_3 */
	-
	- {
	- Enumeration Enum_Loc;
	-
	- Enum_Loc = Enum_Par_Val;
	- if (Enum_Loc == Ident_3)
	- /* then, executed */
	- return (true);
	- else /* not executed */
	- return (false);
	- } /* Func_3 */
	diff --git a/contrib/cortex-strings/benchmarks/multi/harness.c b/contrib/cortex-strings/benchmarks/multi/harness.c
	deleted file mode 100644
	--- a/contrib/cortex-strings/benchmarks/multi/harness.c
	+++ /dev/null
	@@ -1,407 +0,0 @@
	-/*
	- * Copyright (c) 2011, Linaro Limited
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions are met:
	- * * Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * * Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * * Neither the name of the Linaro nor the
	- * names of its contributors may be used to endorse or promote products
	- * derived from this software without specific prior written permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
	- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/** A simple harness that times how long a string function takes to
	- * run.
	- */
	-
	-/* PENDING: Add EPL */
	-
	-#include <string.h>
	-#include <time.h>
	-#include <stdint.h>
	-#include <stdlib.h>
	-#include <stdio.h>
	-#include <stdbool.h>
	-#include <assert.h>
	-#include <unistd.h>
	-#include <errno.h>
	-
	-#define NUM_ELEMS(_x) (sizeof(_x) / sizeof((_x)[0]))
	-
	-#ifndef VERSION
	-#define VERSION "(unknown version)"
	-#endif
	-
	-/** Make sure a function is called by using the return value */
	-#define SPOIL(_x) volatile long x = (long)(_x); (void)x
	-
	-/** Type of functions that can be tested */
	-typedef void (stub_t)(void dest, void *src, size_t n);
	-
	-/** Meta data about one test */
	-struct test
	-{
	- /** Test name */
	- const char *name;
	- /** Function to test */
	- stub_t stub;
	-};
	-
	-/** Flush the cache by reading a chunk of memory */
	-static void empty(volatile char *against)
	-{
	- /* We know that there's a 16 k cache with 64 byte lines giving
	- a total of 256 lines. Read randomly from 256*5 places should
	- flush everything */
	- int offset = (1024 - 256)*1024;
	-
	- for (int i = offset; i < offset + 1610243; i += 64)
	- {
	- against[i];
	- }
	-}
	-
	-/** Stub that does nothing. Used for calibrating */
	-static void xbounce(void dest, void src, size_t n)
	-{
	- SPOIL(0);
	-}
	-
	-/** Stub that calls memcpy */
	-static void xmemcpy(void dest, void src, size_t n)
	-{
	- SPOIL(memcpy(dest, src, n));
	-}
	-
	-/** Stub that calls memset */
	-static void xmemset(void dest, void src, size_t n)
	-{
	- SPOIL(memset(dest, 0, n));
	-}
	-
	-/** Stub that calls memcmp */
	-static void xmemcmp(void dest, void src, size_t n)
	-{
	- SPOIL(memcmp(dest, src, n));
	-}
	-
	-/** Stub that calls strcpy */
	-static void xstrcpy(void dest, void src, size_t n)
	-{
	- SPOIL(strcpy(dest, src));
	-}
	-
	-/** Stub that calls strlen */
	-static void xstrlen(void dest, void src, size_t n)
	-{
	- SPOIL(strlen(dest));
	-}
	-
	-/** Stub that calls strcmp */
	-static void xstrcmp(void dest, void src, size_t n)
	-{
	- SPOIL(strcmp(dest, src));
	-}
	-
	-/** Stub that calls strchr */
	-static void xstrchr(void dest, void src, size_t n)
	-{
	- /* Put the character at the end of the string and before the null */
	- ((char *)src)[n-1] = 32;
	- SPOIL(strchr(src, 32));
	-}
	-
	-/** Stub that calls memchr */
	-static void xmemchr(void dest, void src, size_t n)
	-{
	- /* Put the character at the end of the block */
	- ((char *)src)[n-1] = 32;
	- SPOIL(memchr(src, 32, n));
	-}
	-
	-/** All functions that can be tested */
	-static const struct test tests[] =
	- {
	- { "bounce", xbounce },
	- { "memchr", xmemchr },
	- { "memcpy", xmemcpy },
	- { "memset", xmemset },
	- { "memcmp", xmemcmp },
	- { "strchr", xstrchr },
	- { "strcmp", xstrcmp },
	- { "strcpy", xstrcpy },
	- { "strlen", xstrlen },
	- { NULL }
	- };
	-
	-/** Show basic usage */
	-static void usage(const char* name)
	-{
	- printf("%s %s: run a string related benchmark.\n"
	- "usage: %s [-c block-size] [-l loop-count] [-a alignment\|src_alignment:dst_alignment] [-f] [-t test-name] [-r run-id]\n"
	- , name, VERSION, name);
	-
	- printf("Tests:");
	-
	- for (const struct test *ptest = tests; ptest->name != NULL; ptest++)
	- {
	- printf(" %s", ptest->name);
	- }
	-
	- printf("\n");
	-
	- exit(-1);
	-}
	-
	-/** Find the test by name */
	-static const struct test find_test(const char name)
	-{
	- if (name == NULL)
	- {
	- return tests + 0;
	- }
	- else
	- {
	- for (const struct test *p = tests; p->name != NULL; p++)
	- {
	- if (strcmp(p->name, name) == 0)
	- {
	- return p;
	- }
	- }
	- }
	-
	- return NULL;
	-}
	-
	-#define MIN_BUFFER_SIZE 1024*1024
	-#define MAX_ALIGNMENT 256
	-
	-/** Take a pointer and ensure that the lower bits == alignment */
	-static char realign(char p, int alignment)
	-{
	- uintptr_t pp = (uintptr_t)p;
	- pp = (pp + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
	- pp += alignment;
	-
	- return (char *)pp;
	-}
	-
	-static int parse_int_arg(const char arg, const char exe_name)
	-{
	- long int ret;
	-
	- errno = 0;
	- ret = strtol(arg, NULL, 0);
	-
	- if (errno)
	- {
	- usage(exe_name);
	- }
	-
	- return (int)ret;
	-}
	-
	-static void parse_alignment_arg(const char arg, const char exe_name,
	- int src_alignment, int dst_alignment)
	-{
	- long int ret;
	- char *endptr;
	-
	- errno = 0;
	- ret = strtol(arg, &endptr, 0);
	-
	- if (errno)
	- {
	- usage(exe_name);
	- }
	-
	- *src_alignment = (int)ret;
	-
	- if (ret > 256 \|\| ret < 1)
	- {
	- printf("Alignment should be in the range [1, 256].\n");
	- usage(exe_name);
	- }
	-
	- if (ret == 256)
	- ret = 0;
	-
	- if (endptr && *endptr == ':')
	- {
	- errno = 0;
	- ret = strtol(endptr + 1, NULL, 0);
	-
	- if (errno)
	- {
	- usage(exe_name);
	- }
	-
	- if (ret > 256 \|\| ret < 1)
	- {
	- printf("Alignment should be in the range [1, 256].\n");
	- usage(exe_name);
	- }
	-
	- if (ret == 256)
	- ret = 0;
	- }
	-
	- *dst_alignment = (int)ret;
	-}
	-
	-/** Setup and run a test */
	-int main(int argc, char **argv)
	-{
	- /* Size of src and dest buffers */
	- size_t buffer_size = MIN_BUFFER_SIZE;
	-
	- /* Number of bytes per call */
	- int count = 31;
	- /* Number of times to run */
	- int loops = 10000000;
	- /* True to flush the cache each time */
	- int flush = 0;
	- /* Name of the test */
	- const char *name = NULL;
	- /* Alignment of buffers */
	- int src_alignment = 8;
	- int dst_alignment = 8;
	- /* Name of the run */
	- const char *run_id = "0";
	-
	- int opt;
	-
	- while ((opt = getopt(argc, argv, "c:l:ft:r:hva:")) > 0)
	- {
	- switch (opt)
	- {
	- case 'c':
	- count = parse_int_arg(optarg, argv[0]);
	- break;
	- case 'l':
	- loops = parse_int_arg(optarg, argv[0]);
	- break;
	- case 'a':
	- parse_alignment_arg(optarg, argv[0], &src_alignment, &dst_alignment);
	- break;
	- case 'f':
	- flush = 1;
	- break;
	- case 't':
	- name = strdup(optarg);
	- break;
	- case 'r':
	- run_id = strdup(optarg);
	- break;
	- case 'h':
	- usage(argv[0]);
	- break;
	- default:
	- usage(argv[0]);
	- break;
	- }
	- }
	-
	- /* Find the test by name */
	- const struct test *ptest = find_test(name);
	-
	- if (ptest == NULL)
	- {
	- usage(argv[0]);
	- }
	-
	- if (count + MAX_ALIGNMENT * 2 > MIN_BUFFER_SIZE)
	- {
	- buffer_size = count + MAX_ALIGNMENT * 2;
	- }
	-
	- /* Buffers to read and write from */
	- char *src = malloc(buffer_size);
	- char *dest = malloc(buffer_size);
	-
	- assert(src != NULL && dest != NULL);
	-
	- src = realign(src, src_alignment);
	- dest = realign(dest, dst_alignment);
	-
	- /* Fill the buffer with non-zero, reproducable random data */
	- srandom(1539);
	-
	- for (int i = 0; i < buffer_size; i++)
	- {
	- src[i] = (char)random() \| 1;
	- dest[i] = src[i];
	- }
	-
	- /* Make sure the buffers are null terminated for any string tests */
	- src[count] = 0;
	- dest[count] = 0;
	-
	- struct timespec start, end;
	- int err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
	- assert(err == 0);
	-
	- /* Preload */
	- stub_t stub = ptest->stub;
	-
	- /* Run two variants to reduce the cost of testing for the flush */
	- if (flush == 0)
	- {
	- for (int i = 0; i < loops; i++)
	- {
	- (*stub)(dest, src, count);
	- }
	- }
	- else
	- {
	- for (int i = 0; i < loops; i++)
	- {
	- (*stub)(dest, src, count);
	- empty(dest);
	- }
	- }
	-
	- err = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
	- assert(err == 0);
	-
	- /* Drop any leading path and pull the variant name out of the executable */
	- char *variant = strrchr(argv[0], '/');
	-
	- if (variant == NULL)
	- {
	- variant = argv[0];
	- }
	-
	- variant = strstr(variant, "try-");
	- assert(variant != NULL);
	-
	- double elapsed = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9;
	- /* Estimate the bounce time. Measured on a Panda. */
	- double bounced = 0.448730 * loops / 50000000;
	-
	- /* Dump both machine and human readable versions */
	- printf("%s:%s:%u:%u:%d:%d:%s:%.6f: took %.6f s for %u calls to %s of %u bytes. ~%.3f MB/s corrected.\n",
	- variant + 4, ptest->name,
	- count, loops, src_alignment, dst_alignment, run_id,
	- elapsed,
	- elapsed, loops, ptest->name, count,
	- (double)loopscount/(elapsed - bounced)/(10241024));
	-
	- return 0;
	-}
	diff --git a/contrib/cortex-strings/configure.ac b/contrib/cortex-strings/configure.ac
	deleted file mode 100644
	--- a/contrib/cortex-strings/configure.ac
	+++ /dev/null
	@@ -1,88 +0,0 @@
	-# Copyright (c) 2011-2012, Linaro Limited
	-# All rights reserved.
	-#
	-# Redistribution and use in source and binary forms, with or without
	-# modification, are permitted provided that the following conditions are met:
	-# * Redistributions of source code must retain the above copyright
	-# notice, this list of conditions and the following disclaimer.
	-# * Redistributions in binary form must reproduce the above copyright
	-# notice, this list of conditions and the following disclaimer in the
	-# documentation and/or other materials provided with the distribution.
	-# * Neither the name of the Linaro nor the
	-# names of its contributors may be used to endorse or promote products
	-# derived from this software without specific prior written permission.
	-#
	-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
	-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	-
	-AC_INIT(cortex-strings, 1.1-2012.06~dev)
	-AM_INIT_AUTOMAKE(foreign subdir-objects color-tests dist-bzip2)
	-AC_CONFIG_HEADERS([config.h])
	-AC_CONFIG_FILES(Makefile)
	-AC_CANONICAL_HOST
	-AM_PROG_AS
	-AC_PROG_CC
	-AC_PROG_LIBTOOL
	-
	-default_submachine=
	-
	-case $host in
	-aarch64--*)
	- arch=aarch64
	- ;;
	-arm--*)
	- arch=aarch32
	- default_submachine=cortex-a9
	- ;;
	-x86_64---*)
	- arch=generic
	- ;;
	-*)
	- AC_MSG_ERROR([unknown architecture $host])
	- ;;
	-esac
	-
	-AM_CONDITIONAL([HOST_AARCH32], [test x$arch = xaarch32])
	-AM_CONDITIONAL([HOST_AARCH64], [test x$arch = xaarch64])
	-AM_CONDITIONAL([HOST_GENERIC], [test x$arch = xgeneric])
	-
	-AC_ARG_WITH([cpu],
	- AS_HELP_STRING([--with-cpu=CPU],
	- [select code for CPU variant @<:@default=cortex-a9@:>@]]),
	- [dnl
	- case "$withval" in
	- yes\|'') AC_MSG_ERROR([--with-cpu requires an argument]) ;;
	- no) ;;
	- *) submachine="$withval" ;;
	- esac
	-],
	-[submachine=$default_submachine])
	-
	-AC_SUBST(submachine)
	-AM_CONDITIONAL([WITH_SUBMACHINE], [test x$submachine != x])
	-
	-AC_ARG_WITH([neon],
	- AC_HELP_STRING([--with-neon],
	- [include NEON specific routines @<:@default=yes@:>@]),
	- [with_neon=$withval],
	- [with_neon=yes])
	-AC_SUBST(with_neon)
	-AM_CONDITIONAL(WITH_NEON, test x$with_neon = xyes)
	-
	-AC_ARG_WITH([vfp],
	- AC_HELP_STRING([--with-vfp],
	- [include VFP specific routines @<:@default=yes@:>@]),
	- [with_vfp=$withval],
	- [with_vfp=yes])
	-AC_SUBST(with_vfp)
	-AM_CONDITIONAL(WITH_VFP, test x$with_vfp = xyes)
	-
	-AC_OUTPUT
	diff --git a/contrib/cortex-strings/scripts/add-license.sh b/contrib/cortex-strings/scripts/add-license.sh
	deleted file mode 100755
	--- a/contrib/cortex-strings/scripts/add-license.sh
	+++ /dev/null
	@@ -1,79 +0,0 @@
	-#!/bin/bash
	-#
	-# Add the modified BSD license to a file
	-#
	-
	-f=`mktemp -d`
	-trap "rm -rf $f" EXIT
	-
	-year=`date +%Y`
	-cat > $f/original <<EOF
	-Copyright (c) $year, Linaro Limited
	-All rights reserved.
	-
	-Redistribution and use in source and binary forms, with or without
	-modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
	-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	-EOF
	-
	-# Translate it to C style
	-echo "/*" > $f/c
	-sed -r 's/(.)/ \1/' $f/original \| sed -r 's/ +$//' >> $f/c
	-echo " */" >> $f/c
	-echo >> $f/c
	-
	-# ...and shell style
	-sed -r 's/(.*)/# \1/' $f/original \| sed -r 's/ +$//' >> $f/shell
	-echo '#' >> $f/shell
	-echo >> $f/shell
	-
	-for name in $@; do
	- if grep -q Copyright $name; then
	- echo $name already has some type of copyright
	- continue
	- fi
	-
	- case $name in
	- # These files don't have an explicit license
	- autogen.sh)
	- continue;;
	- reference/newlib/)
	- continue;;
	- reference/newlib-xscale/)
	- continue;;
	- /dhry/)
	- continue;;
	-
	- *.c)
	- src=$f/c
	- ;;
	- .sh\|.am\|*.ac)
	- src=$f/shell
	- ;;
	- *)
	- echo Unrecognied extension on $name
	- continue
	- esac
	-
	- cat $src $name > $f/next
	- mv $f/next $name
	- echo Updated $name
	-done
	diff --git a/contrib/cortex-strings/scripts/bench.py b/contrib/cortex-strings/scripts/bench.py
	deleted file mode 100644
	--- a/contrib/cortex-strings/scripts/bench.py
	+++ /dev/null
	@@ -1,175 +0,0 @@
	-#!/usr/bin/env python
	-
	-"""Simple harness that benchmarks different variants of the routines,
	-caches the results, and emits all of the records at the end.
	-
	-Results are generated for different values of:
	- * Source
	- * Routine
	- * Length
	- * Alignment
	-"""
	-
	-import argparse
	-import subprocess
	-import math
	-import sys
	-
	-# Prefix to the executables
	-build = '../build/try-'
	-
	-ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
	-
	-HAS = {
	- 'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
	- 'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
	- 'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
	- 'bionic-c': ALL,
	- 'csl': 'memcpy memset',
	- 'glibc': 'memcpy memset strchr strlen',
	- 'glibc-c': ALL,
	- 'newlib': 'memcpy strcmp strcpy strlen',
	- 'newlib-c': ALL,
	- 'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
	- 'plain': 'memset memcpy strcmp strcpy',
	-}
	-
	-BOUNCE_ALIGNMENTS = ['1']
	-SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
	-DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
	-
	-ALIGNMENTS = {
	- 'bounce': BOUNCE_ALIGNMENTS,
	- 'memchr': SINGLE_BUFFER_ALIGNMENTS,
	- 'memset': SINGLE_BUFFER_ALIGNMENTS,
	- 'strchr': SINGLE_BUFFER_ALIGNMENTS,
	- 'strlen': SINGLE_BUFFER_ALIGNMENTS,
	- 'memcmp': DUAL_BUFFER_ALIGNMENTS,
	- 'memcpy': DUAL_BUFFER_ALIGNMENTS,
	- 'strcmp': DUAL_BUFFER_ALIGNMENTS,
	- 'strcpy': DUAL_BUFFER_ALIGNMENTS,
	-}
	-
	-VARIANTS = sorted(HAS.keys())
	-FUNCTIONS = sorted(ALIGNMENTS.keys())
	-
	-NUM_RUNS = 5
	-
	-def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
	- """Perform a single run, exercising the cache as appropriate."""
	- key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
	-
	- if key in cache:
	- got = cache[key]
	- else:
	- xbuild = build
	- cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
	-
	- try:
	- got = subprocess.check_output(cmd.split()).strip()
	- except OSError, ex:
	- assert False, 'Error %s while running %s' % (ex, cmd)
	-
	- parts = got.split(':')
	- took = float(parts[7])
	-
	- cache[key] = got
	-
	- if not quiet:
	- print got
	- sys.stdout.flush()
	-
	- return took
	-
	-def run_many(cache, variants, bytes, all_functions):
	- # We want the data to come out in a useful order. So fix an
	- # alignment and function, and do all sizes for a variant first
	- bytes = sorted(bytes)
	- mid = bytes[int(len(bytes)/1.5)]
	-
	- if not all_functions:
	- # Use the ordering in 'this' as the default
	- all_functions = HAS['this'].split()
	-
	- # Find all other functions
	- for functions in HAS.values():
	- for function in functions.split():
	- if function not in all_functions:
	- all_functions.append(function)
	-
	- for function in all_functions:
	- for alignment in ALIGNMENTS[function]:
	- for variant in variants:
	- if function not in HAS[variant].split():
	- continue
	-
	- # Run a tracer through and see how long it takes and
	- # adjust the number of loops based on that. Not great
	- # for memchr() and similar which are O(n), but it will
	- # do
	- f = 50000000
	- want = 5.0
	-
	- loops = int(f / math.sqrt(max(1, mid)))
	- took = run(cache, variant, function, mid, loops, alignment, 0,
	- quiet=True)
	- # Keep it reasonable for silly routines like bounce
	- factor = min(20, max(0.05, want/took))
	- f = f * factor
	-
	- # Round f to a few significant figures
	- scale = 10**int(math.log10(f) - 1)
	- f = scale*int(f/scale)
	-
	- for b in sorted(bytes):
	- # Figure out the number of loops to give a roughly consistent run
	- loops = int(f / math.sqrt(max(1, b)))
	- for run_id in range(0, NUM_RUNS):
	- run(cache, variant, function, b, loops, alignment,
	- run_id)
	-
	-def run_top(cache):
	- parser = argparse.ArgumentParser()
	- parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
	- parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
	- parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
	- args = parser.parse_args()
	-
	- # Test all powers of 2
	- step1 = 2.0
	- # Test intermediate powers of 1.4
	- step2 = 1.4
	-
	- bytes = []
	-
	- for step in [step1, step2]:
	- if step:
	- # Figure out how many steps get us up to the top
	- steps = int(round(math.log(args.limit) / math.log(step)))
	- bytes.extend([int(step**x) for x in range(0, steps+1)])
	-
	- run_many(cache, args.variants, bytes, args.functions)
	-
	-def main():
	- cachename = 'cache.txt'
	-
	- cache = {}
	-
	- try:
	- with open(cachename) as f:
	- for line in f:
	- line = line.strip()
	- parts = line.split(':')
	- cache[':'.join(parts[:7])] = line
	- except:
	- pass
	-
	- try:
	- run_top(cache)
	- finally:
	- with open(cachename, 'w') as f:
	- for line in sorted(cache.values()):
	- print >> f, line
	-
	-if __name__ == '__main__':
	- main()
	diff --git a/contrib/cortex-strings/scripts/fixup.py b/contrib/cortex-strings/scripts/fixup.py
	deleted file mode 100644
	--- a/contrib/cortex-strings/scripts/fixup.py
	+++ /dev/null
	@@ -1,27 +0,0 @@
	-"""Simple script that enables target specific blocks based on the first argument.
	-
	-Matches comment blocks like this:
	-
	-/* For Foo: abc
	-def
	-*/
	-
	-and de-comments them giving:
	-abc
	-def
	-"""
	-import re
	-import sys
	-
	-def main():
	- key = sys.argv[1]
	- expr = re.compile(r'/\* For %s:\s([^]+)\/' % key, re.M)
	-
	- for arg in sys.argv[2:]:
	- with open(arg) as f:
	- body = f.read()
	- with open(arg, 'w') as f:
	- f.write(expr.sub(r'\1', body))
	-
	-if __name__ == '__main__':
	- main()
	diff --git a/contrib/cortex-strings/scripts/libplot.py b/contrib/cortex-strings/scripts/libplot.py
	deleted file mode 100644
	--- a/contrib/cortex-strings/scripts/libplot.py
	+++ /dev/null
	@@ -1,78 +0,0 @@
	-"""Shared routines for the plotters."""
	-
	-import fileinput
	-import collections
	-
	-Record = collections.namedtuple('Record', 'variant function bytes loops src_alignment dst_alignment run_id elapsed rest')
	-
	-
	-def make_colours():
	- return iter('m b g r c y k pink orange brown grey'.split())
	-
	-def parse_value(v):
	- """Turn text into a primitive"""
	- try:
	- if '.' in v:
	- return float(v)
	- else:
	- return int(v)
	- except ValueError:
	- return v
	-
	-def create_column_tuple(record, names):
	- cols = [getattr(record, name) for name in names]
	- return tuple(cols)
	-
	-def unique(records, name, prefer=''):
	- """Return the unique values of a column in the records"""
	- if type(name) == tuple:
	- values = list(set(create_column_tuple(x, name) for x in records))
	- else:
	- values = list(set(getattr(x, name) for x in records))
	-
	- if not values:
	- return values
	- elif type(values[0]) == str:
	- return sorted(values, key=lambda x: '%-06d\|%s' % (-prefer.find(x), x))
	- else:
	- return sorted(values)
	-
	-def alignments_equal(alignments):
	- for alignment in alignments:
	- if alignment[0] != alignment[1]:
	- return False
	- return True
	-
	-def parse_row(line):
	- return Record(*[parse_value(y) for y in line.split(':')])
	-
	-def parse():
	- """Parse a record file into named tuples, correcting for loop
	- overhead along the way.
	- """
	- records = [parse_row(x) for x in fileinput.input()]
	-
	- # Pull out any bounce values
	- costs = {}
	-
	- for record in [x for x in records if x.function=='bounce']:
	- costs[(record.bytes, record.loops)] = record.elapsed
	-
	- # Fix up all of the records for cost
	- out = []
	-
	- for record in records:
	- if record.function == 'bounce':
	- continue
	-
	- cost = costs.get((record.bytes, record.loops), None)
	-
	- if not cost:
	- out.append(record)
	- else:
	- # Unfortunately you can't update a namedtuple...
	- values = list(record)
	- values[-2] -= cost
	- out.append(Record(*values))
	-
	- return out
	diff --git a/contrib/cortex-strings/scripts/plot-align.py b/contrib/cortex-strings/scripts/plot-align.py
	deleted file mode 100644
	--- a/contrib/cortex-strings/scripts/plot-align.py
	+++ /dev/null
	@@ -1,67 +0,0 @@
	-#!/usr/bin/env python
	-
	-"""Plot the performance of different variants of one routine versus alignment.
	-"""
	-
	-import libplot
	-
	-import pylab
	-
	-
	-def plot(records, bytes, function):
	- records = [x for x in records if x.bytes==bytes and x.function==function]
	-
	- variants = libplot.unique(records, 'variant', prefer='this')
	- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
	-
	- X = pylab.arange(len(alignments))
	- width = 1.0/(len(variants)+1)
	-
	- colours = libplot.make_colours()
	-
	- pylab.figure(1).set_size_inches((16, 12))
	- pylab.clf()
	-
	- for i, variant in enumerate(variants):
	- heights = []
	-
	- for alignment in alignments:
	- matches = [x for x in records if x.variant==variant and x.src_alignment==alignment[0] and x.dst_alignment==alignment[1]]
	-
	- if matches:
	- vals = [match.bytesmatch.loops/match.elapsed/(10241024) for
	- match in matches]
	- mean = sum(vals)/len(vals)
	- heights.append(mean)
	- else:
	- heights.append(0)
	-
	- pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
	-
	-
	- axes = pylab.axes()
	- if libplot.alignments_equal(alignments):
	- alignment_labels = ["%s" % x[0] for x in alignments]
	- else:
	- alignment_labels = ["%s:%s" % (x[0], x[1]) for x in alignments]
	- axes.set_xticklabels(alignment_labels)
	- axes.set_xticks(X + 0.5)
	-
	- pylab.title('Performance of different variants of %(function)s for %(bytes)d byte blocks' % locals())
	- pylab.xlabel('Alignment')
	- pylab.ylabel('Rate (MB/s)')
	- pylab.legend(loc='lower right', ncol=3)
	- pylab.grid()
	- pylab.savefig('alignment-%(function)s-%(bytes)d.png' % locals(), dpi=72)
	-
	-def main():
	- records = libplot.parse()
	-
	- for function in libplot.unique(records, 'function'):
	- for bytes in libplot.unique(records, 'bytes'):
	- plot(records, bytes, function)
	-
	- pylab.show()
	-
	-if __name__ == '__main__':
	- main()
	diff --git a/contrib/cortex-strings/scripts/plot-sizes.py b/contrib/cortex-strings/scripts/plot-sizes.py
	deleted file mode 100644
	--- a/contrib/cortex-strings/scripts/plot-sizes.py
	+++ /dev/null
	@@ -1,120 +0,0 @@
	-#!/usr/bin/env python
	-
	-"""Plot the performance for different block sizes of one function across
	-variants.
	-"""
	-
	-import libplot
	-
	-import pylab
	-import pdb
	-import math
	-
	-def pretty_kb(v):
	- if v < 1024:
	- return '%d' % v
	- else:
	- if v % 1024 == 0:
	- return '%d k' % (v//1024)
	- else:
	- return '%.1f k' % (v/1024)
	-
	-def plot(records, function, alignment=None, scale=1):
	- variants = libplot.unique(records, 'variant', prefer='this')
	- records = [x for x in records if x.function==function]
	-
	- if alignment != None:
	- records = [x for x in records if x.src_alignment==alignment[0] and
	- x.dst_alignment==alignment[1]]
	-
	- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
	- if len(alignments) != 1:
	- return False
	- if libplot.alignments_equal(alignments):
	- aalignment = alignments[0][0]
	- else:
	- aalignment = "%s:%s" % (alignments[0][0], alignments[0][1])
	-
	- bytes = libplot.unique(records, 'bytes')[0]
	-
	- colours = libplot.make_colours()
	- all_x = []
	-
	- pylab.figure(1).set_size_inches((6.4scale, 4.8scale))
	- pylab.clf()
	-
	- if 'str' in function:
	- # The harness fills out to 16k. Anything past that is an
	- # early match
	- top = 16384
	- else:
	- top = 2**31
	-
	- for variant in variants:
	- matches = [x for x in records if x.variant==variant and x.bytes <= top]
	- matches.sort(key=lambda x: x.bytes)
	-
	- X = sorted(list(set([x.bytes for x in matches])))
	- Y = []
	- Yerr = []
	- for xbytes in X:
	- vals = [x.bytesx.loops/x.elapsed/(10241024) for x in matches if x.bytes == xbytes]
	- if len(vals) > 1:
	- mean = sum(vals)/len(vals)
	- Y.append(mean)
	- if len(Yerr) == 0:
	- Yerr = [[], []]
	- err1 = max(vals) - mean
	- assert err1 >= 0
	- err2 = min(vals) - mean
	- assert err2 <= 0
	- Yerr[0].append(abs(err2))
	- Yerr[1].append(err1)
	- else:
	- Y.append(vals[0])
	-
	- all_x.extend(X)
	- colour = colours.next()
	-
	- if X:
	- pylab.plot(X, Y, c=colour)
	- if len(Yerr) > 0:
	- pylab.errorbar(X, Y, yerr=Yerr, c=colour, label=variant, fmt='o')
	- else:
	- pylab.scatter(X, Y, c=colour, label=variant, edgecolors='none')
	-
	- pylab.legend(loc='upper left', ncol=3, prop={'size': 'small'})
	- pylab.grid()
	- pylab.title('%(function)s of %(aalignment)s byte aligned blocks' % locals())
	- pylab.xlabel('Size (B)')
	- pylab.ylabel('Rate (MB/s)')
	-
	- # Figure out how high the range goes
	- top = max(all_x)
	-
	- power = int(round(math.log(max(all_x)) / math.log(2)))
	-
	- pylab.semilogx()
	-
	- pylab.axes().set_xticks([2**x for x in range(0, power+1)])
	- pylab.axes().set_xticklabels([pretty_kb(2**x) for x in range(0, power+1)])
	- pylab.xlim(0, top)
	- pylab.ylim(0, pylab.ylim()[1])
	- return True
	-
	-def main():
	- records = libplot.parse()
	-
	- functions = libplot.unique(records, 'function')
	- alignments = libplot.unique(records, ('src_alignment', 'dst_alignment'))
	-
	- for function in functions:
	- for alignment in alignments:
	- for scale in [1, 2.5]:
	- if plot(records, function, alignment, scale):
	- pylab.savefig('sizes-%s-%02d-%02d-%.1f.png' % (function, alignment[0], alignment[1], scale), dpi=72)
	-
	- pylab.show()
	-
	-if __name__ == '__main__':
	- main()
	diff --git a/contrib/cortex-strings/scripts/plot-top.py b/contrib/cortex-strings/scripts/plot-top.py
	deleted file mode 100644
	--- a/contrib/cortex-strings/scripts/plot-top.py
	+++ /dev/null
	@@ -1,61 +0,0 @@
	-#!/usr/bin/env python
	-
	-"""Plot the performance of different variants of the string routines
	-for one size.
	-"""
	-
	-import libplot
	-
	-import pylab
	-
	-
	-def plot(records, bytes):
	- records = [x for x in records if x.bytes==bytes]
	-
	- variants = libplot.unique(records, 'variant', prefer='this')
	- functions = libplot.unique(records, 'function')
	-
	- X = pylab.arange(len(functions))
	- width = 1.0/(len(variants)+1)
	-
	- colours = libplot.make_colours()
	-
	- pylab.figure(1).set_size_inches((16, 12))
	- pylab.clf()
	-
	- for i, variant in enumerate(variants):
	- heights = []
	-
	- for function in functions:
	- matches = [x for x in records if x.variant==variant and x.function==function and x.src_alignment==8]
	-
	- if matches:
	- vals = [match.bytesmatch.loops/match.elapsed/(10241024) for
	- match in matches]
	- mean = sum(vals)/len(vals)
	- heights.append(mean)
	- else:
	- heights.append(0)
	-
	- pylab.bar(X+i*width, heights, width, color=colours.next(), label=variant)
	-
	- axes = pylab.axes()
	- axes.set_xticklabels(functions)
	- axes.set_xticks(X + 0.5)
	-
	- pylab.title('Performance of different variants for %d byte blocks' % bytes)
	- pylab.ylabel('Rate (MB/s)')
	- pylab.legend(loc='upper left', ncol=3)
	- pylab.grid()
	- pylab.savefig('top-%06d.png' % bytes, dpi=72)
	-
	-def main():
	- records = libplot.parse()
	-
	- for bytes in libplot.unique(records, 'bytes'):
	- plot(records, bytes)
	-
	- pylab.show()
	-
	-if __name__ == '__main__':
	- main()
	diff --git a/contrib/cortex-strings/scripts/plot.py b/contrib/cortex-strings/scripts/plot.py
	deleted file mode 100644
	--- a/contrib/cortex-strings/scripts/plot.py
	+++ /dev/null
	@@ -1,123 +0,0 @@
	-"""Plot the results for each test. Spits out a set of images into the
	-current directory.
	-"""
	-
	-import libplot
	-
	-import fileinput
	-import collections
	-import pprint
	-
	-import pylab
	-
	-Record = collections.namedtuple('Record', 'variant test size loops src_alignment dst_alignment run_id rawtime comment time bytes rate')
	-
	-def unique(rows, name):
	- """Takes a list of values, pulls out the named field, and returns
	- a list of the unique values of this field.
	- """
	- return sorted(set(getattr(x, name) for x in rows))
	-
	-def to_float(v):
	- """Convert a string into a better type.
	-
	- >>> to_float('foo')
	- 'foo'
	- >>> to_float('1.23')
	- 1.23
	- >>> to_float('45')
	- 45
	- """
	- try:
	- if '.' in v:
	- return float(v)
	- else:
	- return int(v)
	- except:
	- return v
	-
	-def parse():
	- # Split the input up
	- rows = [x.strip().split(':') for x in fileinput.input()]
	- # Automatically turn numbers into the base type
	- rows = [[to_float(y) for y in x] for x in rows]
	-
	- # Scan once to calculate the overhead
	- r = [Record(*(x + [0, 0, 0])) for x in rows]
	- bounces = pylab.array([(x.loops, x.rawtime) for x in r if x.test == 'bounce'])
	- fit = pylab.polyfit(bounces[:,0], bounces[:,1], 1)
	-
	- records = []
	-
	- for row in rows:
	- # Make a dummy record so we can use the names
	- r1 = Record(*(row + [0, 0, 0]))
	-
	- bytes = r1.size * r1.loops
	- # Calculate the bounce time
	- delta = pylab.polyval(fit, [r1.loops])
	- time = r1.rawtime - delta
	- rate = bytes / time
	-
	- records.append(Record(*(row + [time, bytes, rate])))
	-
	- return records
	-
	-def plot(records, field, scale, ylabel):
	- variants = unique(records, 'variant')
	- tests = unique(records, 'test')
	-
	- colours = libplot.make_colours()
	-
	- # A little hack. We want the 'all' record to be drawn last so
	- # that it's obvious on the graph. Assume that no tests come
	- # before it alphabetically
	- variants.reverse()
	-
	- for test in tests:
	- for variant in variants:
	- v = [x for x in records if x.test==test and x.variant==variant]
	- v.sort(key=lambda x: x.size)
	- V = pylab.array([(x.size, getattr(x, field)) for x in v])
	-
	- # Ensure our results appear
	- order = 1 if variant == 'this' else 0
	-
	- try:
	- # A little hack. We want the 'all' to be obvious on
	- # the graph
	- if variant == 'all':
	- pylab.scatter(V[:,0], V[:,1]/scale, label=variant)
	- pylab.plot(V[:,0], V[:,1]/scale)
	- else:
	- pylab.plot(V[:,0], V[:,1]/scale, label=variant,
	- zorder=order, c = colours.next())
	-
	- except Exception, ex:
	- # michaelh1 likes to run this script while the test is
	- # still running which can lead to bad data
	- print ex, 'on %s of %s' % (variant, test)
	-
	- pylab.legend(loc='lower right', ncol=2, prop={'size': 'small'})
	- pylab.xlabel('Block size (B)')
	- pylab.ylabel(ylabel)
	- pylab.title('%s %s' % (test, field))
	- pylab.grid()
	-
	- pylab.savefig('%s-%s.png' % (test, field), dpi=100)
	- pylab.semilogx(basex=2)
	- pylab.savefig('%s-%s-semilog.png' % (test, field), dpi=100)
	- pylab.clf()
	-
	-def test():
	- import doctest
	- doctest.testmod()
	-
	-def main():
	- records = parse()
	-
	- plot(records, 'rate', 1024**2, 'Rate (MB/s)')
	- plot(records, 'time', 1, 'Total time (s)')
	-
	-if __name__ == '__main__':
	- main()
	diff --git a/contrib/cortex-strings/scripts/trim.sh b/contrib/cortex-strings/scripts/trim.sh
	deleted file mode 100755
	--- a/contrib/cortex-strings/scripts/trim.sh
	+++ /dev/null
	@@ -1,9 +0,0 @@
	-#!/bin/bash
	-#
	-# Trims the whitespace from around any given images
	-#
	-
	-for i in $@; do
	- convert $i -bordercolor white -border 1x1 -trim +repage -alpha off +dither -colors 32 PNG8:next-$i
	- mv next-$i $i
	-done
	diff --git a/contrib/cortex-strings/src/aarch64/memchr.S b/contrib/cortex-strings/src/aarch64/memchr.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/memchr.S
	+++ /dev/null
	@@ -1,172 +0,0 @@
	-/*
	- * memchr - find a character in a memory zone
	- *
	- * Copyright (c) 2014, ARM Limited
	- * All rights Reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions are met:
	- * * Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * * Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * * Neither the name of the company nor the names of its contributors
	- * may be used to endorse or promote products derived from this
	- * software without specific prior written permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64
	- * Neon Available.
	- */
	-
	-/* Arguments and results. */
	-#define srcin x0
	-#define chrin w1
	-#define cntin x2
	-
	-#define result x0
	-
	-#define src x3
	-#define tmp x4
	-#define wtmp2 w5
	-#define synd x6
	-#define soff x9
	-#define cntrem x10
	-
	-#define vrepchr v0
	-#define vdata1 v1
	-#define vdata2 v2
	-#define vhas_chr1 v3
	-#define vhas_chr2 v4
	-#define vrepmask v5
	-#define vend v6
	-
	-/*
	- * Core algorithm:
	- *
	- * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
	- * per byte. For each tuple, bit 0 is set if the relevant byte matched the
	- * requested character and bit 1 is not used (faster than using a 32bit
	- * syndrome). Since the bits in the syndrome reflect exactly the order in which
	- * things occur in the original string, counting trailing zeros allows to
	- * identify exactly which byte has matched.
	- */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-def_fn memchr
	- /* Do not dereference srcin if no bytes to compare. */
	- cbz cntin, .Lzero_length
	- /*
	- * Magic constant 0x40100401 allows us to identify which lane matches
	- * the requested byte.
	- */
	- mov wtmp2, #0x0401
	- movk wtmp2, #0x4010, lsl #16
	- dup vrepchr.16b, chrin
	- /* Work with aligned 32-byte chunks */
	- bic src, srcin, #31
	- dup vrepmask.4s, wtmp2
	- ands soff, srcin, #31
	- and cntrem, cntin, #31
	- b.eq .Lloop
	-
	- /*
	- * Input string is not 32-byte aligned. We calculate the syndrome
	- * value for the aligned 32 bytes block containing the first bytes
	- * and mask the irrelevant part.
	- */
	-
	- ld1 {vdata1.16b, vdata2.16b}, [src], #32
	- sub tmp, soff, #32
	- adds cntin, cntin, tmp
	- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
	- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
	- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
	- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
	- addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
	- addp vend.16b, vend.16b, vend.16b /* 128->64 */
	- mov synd, vend.d[0]
	- /* Clear the soff2 lower bits /
	- lsl tmp, soff, #1
	- lsr synd, synd, tmp
	- lsl synd, synd, tmp
	- /* The first block can also be the last */
	- b.ls .Lmasklast
	- /* Have we found something already? */
	- cbnz synd, .Ltail
	-
	-.Lloop:
	- ld1 {vdata1.16b, vdata2.16b}, [src], #32
	- subs cntin, cntin, #32
	- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
	- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
	- /* If we're out of data we finish regardless of the result */
	- b.ls .Lend
	- /* Use a fast check for the termination condition */
	- orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
	- addp vend.2d, vend.2d, vend.2d
	- mov synd, vend.d[0]
	- /* We're not out of data, loop if we haven't found the character */
	- cbz synd, .Lloop
	-
	-.Lend:
	- /* Termination condition found, let's calculate the syndrome value */
	- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
	- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
	- addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
	- addp vend.16b, vend.16b, vend.16b /* 128->64 */
	- mov synd, vend.d[0]
	- /* Only do the clear for the last possible block */
	- b.hi .Ltail
	-
	-.Lmasklast:
	- /* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
	- add tmp, cntrem, soff
	- and tmp, tmp, #31
	- sub tmp, tmp, #32
	- neg tmp, tmp, lsl #1
	- lsl synd, synd, tmp
	- lsr synd, synd, tmp
	-
	-.Ltail:
	- /* Count the trailing zeros using bit reversing */
	- rbit synd, synd
	- /* Compensate the last post-increment */
	- sub src, src, #32
	- /* Check that we have found a character */
	- cmp synd, #0
	- /* And count the leading zeros */
	- clz synd, synd
	- /* Compute the potential result */
	- add result, src, synd, lsr #1
	- /* Select result or NULL */
	- csel result, xzr, result, eq
	- ret
	-
	-.Lzero_length:
	- mov result, #0
	- ret
	-
	- .size memchr, . - memchr
	diff --git a/contrib/cortex-strings/src/aarch64/memcmp.S b/contrib/cortex-strings/src/aarch64/memcmp.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/memcmp.S
	+++ /dev/null
	@@ -1,162 +0,0 @@
	-/* memcmp - compare memory
	-
	- Copyright (c) 2013, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64
	- */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-/* Parameters and result. */
	-#define src1 x0
	-#define src2 x1
	-#define limit x2
	-#define result x0
	-
	-/* Internal variables. */
	-#define data1 x3
	-#define data1w w3
	-#define data2 x4
	-#define data2w w4
	-#define has_nul x5
	-#define diff x6
	-#define endloop x7
	-#define tmp1 x8
	-#define tmp2 x9
	-#define tmp3 x10
	-#define pos x11
	-#define limit_wd x12
	-#define mask x13
	-
	-def_fn memcmp p2align=6
	- cbz limit, .Lret0
	- eor tmp1, src1, src2
	- tst tmp1, #7
	- b.ne .Lmisaligned8
	- ands tmp1, src1, #7
	- b.ne .Lmutual_align
	- add limit_wd, limit, #7
	- lsr limit_wd, limit_wd, #3
	- /* Start of performance-critical section -- one 64B cache line. */
	-.Lloop_aligned:
	- ldr data1, [src1], #8
	- ldr data2, [src2], #8
	-.Lstart_realigned:
	- subs limit_wd, limit_wd, #1
	- eor diff, data1, data2 /* Non-zero if differences found. */
	- csinv endloop, diff, xzr, ne /* Last Dword or differences. */
	- cbz endloop, .Lloop_aligned
	- /* End of performance-critical section -- one 64B cache line. */
	-
	- /* Not reached the limit, must have found a diff. */
	- cbnz limit_wd, .Lnot_limit
	-
	- /* Limit % 8 == 0 => all bytes significant. */
	- ands limit, limit, #7
	- b.eq .Lnot_limit
	-
	- lsl limit, limit, #3 /* Bits -> bytes. */
	- mov mask, #~0
	-#ifdef __AARCH64EB__
	- lsr mask, mask, limit
	-#else
	- lsl mask, mask, limit
	-#endif
	- bic data1, data1, mask
	- bic data2, data2, mask
	-
	- orr diff, diff, mask
	-.Lnot_limit:
	-
	-#ifndef __AARCH64EB__
	- rev diff, diff
	- rev data1, data1
	- rev data2, data2
	-#endif
	- /* The MS-non-zero bit of DIFF marks either the first bit
	- that is different, or the end of the significant data.
	- Shifting left now will bring the critical information into the
	- top bits. */
	- clz pos, diff
	- lsl data1, data1, pos
	- lsl data2, data2, pos
	- /* But we need to zero-extend (char is unsigned) the value and then
	- perform a signed 32-bit subtraction. */
	- lsr data1, data1, #56
	- sub result, data1, data2, lsr #56
	- ret
	-
	-.Lmutual_align:
	- /* Sources are mutually aligned, but are not currently at an
	- alignment boundary. Round down the addresses and then mask off
	- the bytes that precede the start point. */
	- bic src1, src1, #7
	- bic src2, src2, #7
	- add limit, limit, tmp1 /* Adjust the limit for the extra. */
	- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
	- ldr data1, [src1], #8
	- neg tmp1, tmp1 /* Bits to alignment -64. */
	- ldr data2, [src2], #8
	- mov tmp2, #~0
	-#ifdef __AARCH64EB__
	- /* Big-endian. Early bytes are at MSB. */
	- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
	-#else
	- /* Little-endian. Early bytes are at LSB. */
	- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
	-#endif
	- add limit_wd, limit, #7
	- orr data1, data1, tmp2
	- orr data2, data2, tmp2
	- lsr limit_wd, limit_wd, #3
	- b .Lstart_realigned
	-
	-.Lret0:
	- mov result, #0
	- ret
	-
	- .p2align 6
	-.Lmisaligned8:
	- sub limit, limit, #1
	-1:
	- /* Perhaps we can do better than this. */
	- ldrb data1w, [src1], #1
	- ldrb data2w, [src2], #1
	- subs limit, limit, #1
	- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
	- b.eq 1b
	- sub result, data1, data2
	- ret
	- .size memcmp, . - memcmp
	diff --git a/contrib/cortex-strings/src/aarch64/memcpy.S b/contrib/cortex-strings/src/aarch64/memcpy.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/memcpy.S
	+++ /dev/null
	@@ -1,225 +0,0 @@
	-/* Copyright (c) 2012, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/*
	- * Copyright (c) 2015 ARM Ltd
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. The name of the company may not be used to endorse or promote
	- * products derived from this software without specific prior written
	- * permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
	- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
	- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64, unaligned accesses.
	- *
	- */
	-
	-#define dstin x0
	-#define src x1
	-#define count x2
	-#define dst x3
	-#define srcend x4
	-#define dstend x5
	-#define A_l x6
	-#define A_lw w6
	-#define A_h x7
	-#define A_hw w7
	-#define B_l x8
	-#define B_lw w8
	-#define B_h x9
	-#define C_l x10
	-#define C_h x11
	-#define D_l x12
	-#define D_h x13
	-#define E_l src
	-#define E_h count
	-#define F_l dst
	-#define F_h srcend
	-#define tmp1 x9
	-
	-#define L(l) .L ## l
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-/* Copies are split into 3 main cases: small copies of up to 16 bytes,
	- medium copies of 17..96 bytes which are fully unrolled. Large copies
	- of more than 96 bytes align the destination and use an unrolled loop
	- processing 64 bytes per iteration.
	- Small and medium copies read all data before writing, allowing any
	- kind of overlap, and memmove tailcalls memcpy for these cases as
	- well as non-overlapping copies.
	-*/
	-
	-def_fn memcpy p2align=6
	- prfm PLDL1KEEP, [src]
	- add srcend, src, count
	- add dstend, dstin, count
	- cmp count, 16
	- b.ls L(copy16)
	- cmp count, 96
	- b.hi L(copy_long)
	-
	- /* Medium copies: 17..96 bytes. */
	- sub tmp1, count, 1
	- ldp A_l, A_h, [src]
	- tbnz tmp1, 6, L(copy96)
	- ldp D_l, D_h, [srcend, -16]
	- tbz tmp1, 5, 1f
	- ldp B_l, B_h, [src, 16]
	- ldp C_l, C_h, [srcend, -32]
	- stp B_l, B_h, [dstin, 16]
	- stp C_l, C_h, [dstend, -32]
	-1:
	- stp A_l, A_h, [dstin]
	- stp D_l, D_h, [dstend, -16]
	- ret
	-
	- .p2align 4
	- /* Small copies: 0..16 bytes. */
	-L(copy16):
	- cmp count, 8
	- b.lo 1f
	- ldr A_l, [src]
	- ldr A_h, [srcend, -8]
	- str A_l, [dstin]
	- str A_h, [dstend, -8]
	- ret
	- .p2align 4
	-1:
	- tbz count, 2, 1f
	- ldr A_lw, [src]
	- ldr A_hw, [srcend, -4]
	- str A_lw, [dstin]
	- str A_hw, [dstend, -4]
	- ret
	-
	- /* Copy 0..3 bytes. Use a branchless sequence that copies the same
	- byte 3 times if count==1, or the 2nd byte twice if count==2. */
	-1:
	- cbz count, 2f
	- lsr tmp1, count, 1
	- ldrb A_lw, [src]
	- ldrb A_hw, [srcend, -1]
	- ldrb B_lw, [src, tmp1]
	- strb A_lw, [dstin]
	- strb B_lw, [dstin, tmp1]
	- strb A_hw, [dstend, -1]
	-2: ret
	-
	- .p2align 4
	- /* Copy 64..96 bytes. Copy 64 bytes from the start and
	- 32 bytes from the end. */
	-L(copy96):
	- ldp B_l, B_h, [src, 16]
	- ldp C_l, C_h, [src, 32]
	- ldp D_l, D_h, [src, 48]
	- ldp E_l, E_h, [srcend, -32]
	- ldp F_l, F_h, [srcend, -16]
	- stp A_l, A_h, [dstin]
	- stp B_l, B_h, [dstin, 16]
	- stp C_l, C_h, [dstin, 32]
	- stp D_l, D_h, [dstin, 48]
	- stp E_l, E_h, [dstend, -32]
	- stp F_l, F_h, [dstend, -16]
	- ret
	-
	- /* Align DST to 16 byte alignment so that we don't cross cache line
	- boundaries on both loads and stores. There are at least 96 bytes
	- to copy, so copy 16 bytes unaligned and then align. The loop
	- copies 64 bytes per iteration and prefetches one iteration ahead. */
	-
	- .p2align 4
	-L(copy_long):
	- and tmp1, dstin, 15
	- bic dst, dstin, 15
	- ldp D_l, D_h, [src]
	- sub src, src, tmp1
	- add count, count, tmp1 /* Count is now 16 too large. */
	- ldp A_l, A_h, [src, 16]
	- stp D_l, D_h, [dstin]
	- ldp B_l, B_h, [src, 32]
	- ldp C_l, C_h, [src, 48]
	- ldp D_l, D_h, [src, 64]!
	- subs count, count, 128 + 16 /* Test and readjust count. */
	- b.ls 2f
	-1:
	- stp A_l, A_h, [dst, 16]
	- ldp A_l, A_h, [src, 16]
	- stp B_l, B_h, [dst, 32]
	- ldp B_l, B_h, [src, 32]
	- stp C_l, C_h, [dst, 48]
	- ldp C_l, C_h, [src, 48]
	- stp D_l, D_h, [dst, 64]!
	- ldp D_l, D_h, [src, 64]!
	- subs count, count, 64
	- b.hi 1b
	-
	- /* Write the last full set of 64 bytes. The remainder is at most 64
	- bytes, so it is safe to always copy 64 bytes from the end even if
	- there is just 1 byte left. */
	-2:
	- ldp E_l, E_h, [srcend, -64]
	- stp A_l, A_h, [dst, 16]
	- ldp A_l, A_h, [srcend, -48]
	- stp B_l, B_h, [dst, 32]
	- ldp B_l, B_h, [srcend, -32]
	- stp C_l, C_h, [dst, 48]
	- ldp C_l, C_h, [srcend, -16]
	- stp D_l, D_h, [dst, 64]
	- stp E_l, E_h, [dstend, -64]
	- stp A_l, A_h, [dstend, -48]
	- stp B_l, B_h, [dstend, -32]
	- stp C_l, C_h, [dstend, -16]
	- ret
	-
	- .size memcpy, . - memcpy
	diff --git a/contrib/cortex-strings/src/aarch64/memmove.S b/contrib/cortex-strings/src/aarch64/memmove.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/memmove.S
	+++ /dev/null
	@@ -1,150 +0,0 @@
	-/* Copyright (c) 2013, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/*
	- * Copyright (c) 2015 ARM Ltd
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. The name of the company may not be used to endorse or promote
	- * products derived from this software without specific prior written
	- * permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
	- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
	- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64, unaligned accesses
	- */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-/* Parameters and result. */
	-#define dstin x0
	-#define src x1
	-#define count x2
	-#define srcend x3
	-#define dstend x4
	-#define tmp1 x5
	-#define A_l x6
	-#define A_h x7
	-#define B_l x8
	-#define B_h x9
	-#define C_l x10
	-#define C_h x11
	-#define D_l x12
	-#define D_h x13
	-#define E_l count
	-#define E_h tmp1
	-
	-/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
	- Larger backwards copies are also handled by memcpy. The only remaining
	- case is forward large copies. The destination is aligned, and an
	- unrolled loop processes 64 bytes per iteration.
	-*/
	-
	-def_fn memmove, 6
	- sub tmp1, dstin, src
	- cmp count, 96
	- ccmp tmp1, count, 2, hi
	- b.hs memcpy
	-
	- cbz tmp1, 3f
	- add dstend, dstin, count
	- add srcend, src, count
	-
	- /* Align dstend to 16 byte alignment so that we don't cross cache line
	- boundaries on both loads and stores. There are at least 96 bytes
	- to copy, so copy 16 bytes unaligned and then align. The loop
	- copies 64 bytes per iteration and prefetches one iteration ahead. */
	-
	- and tmp1, dstend, 15
	- ldp D_l, D_h, [srcend, -16]
	- sub srcend, srcend, tmp1
	- sub count, count, tmp1
	- ldp A_l, A_h, [srcend, -16]
	- stp D_l, D_h, [dstend, -16]
	- ldp B_l, B_h, [srcend, -32]
	- ldp C_l, C_h, [srcend, -48]
	- ldp D_l, D_h, [srcend, -64]!
	- sub dstend, dstend, tmp1
	- subs count, count, 128
	- b.ls 2f
	- nop
	-1:
	- stp A_l, A_h, [dstend, -16]
	- ldp A_l, A_h, [srcend, -16]
	- stp B_l, B_h, [dstend, -32]
	- ldp B_l, B_h, [srcend, -32]
	- stp C_l, C_h, [dstend, -48]
	- ldp C_l, C_h, [srcend, -48]
	- stp D_l, D_h, [dstend, -64]!
	- ldp D_l, D_h, [srcend, -64]!
	- subs count, count, 64
	- b.hi 1b
	-
	- /* Write the last full set of 64 bytes. The remainder is at most 64
	- bytes, so it is safe to always copy 64 bytes from the start even if
	- there is just 1 byte left. */
	-2:
	- ldp E_l, E_h, [src, 48]
	- stp A_l, A_h, [dstend, -16]
	- ldp A_l, A_h, [src, 32]
	- stp B_l, B_h, [dstend, -32]
	- ldp B_l, B_h, [src, 16]
	- stp C_l, C_h, [dstend, -48]
	- ldp C_l, C_h, [src]
	- stp D_l, D_h, [dstend, -64]
	- stp E_l, E_h, [dstin, 48]
	- stp A_l, A_h, [dstin, 32]
	- stp B_l, B_h, [dstin, 16]
	- stp C_l, C_h, [dstin]
	-3: ret
	-
	- .size memmove, . - memmove
	diff --git a/contrib/cortex-strings/src/aarch64/memset.S b/contrib/cortex-strings/src/aarch64/memset.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/memset.S
	+++ /dev/null
	@@ -1,235 +0,0 @@
	-/* Copyright (c) 2012, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/*
	- * Copyright (c) 2015 ARM Ltd
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. The name of the company may not be used to endorse or promote
	- * products derived from this software without specific prior written
	- * permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
	- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
	- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64, unaligned accesses
	- *
	- */
	-
	-
	-#define dstin x0
	-#define val x1
	-#define valw w1
	-#define count x2
	-#define dst x3
	-#define dstend x4
	-#define tmp1 x5
	-#define tmp1w w5
	-#define tmp2 x6
	-#define tmp2w w6
	-#define zva_len x7
	-#define zva_lenw w7
	-
	-#define L(l) .L ## l
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-def_fn memset p2align=6
	-
	- dup v0.16B, valw
	- add dstend, dstin, count
	-
	- cmp count, 96
	- b.hi L(set_long)
	- cmp count, 16
	- b.hs L(set_medium)
	- mov val, v0.D[0]
	-
	- /* Set 0..15 bytes. */
	- tbz count, 3, 1f
	- str val, [dstin]
	- str val, [dstend, -8]
	- ret
	- nop
	-1: tbz count, 2, 2f
	- str valw, [dstin]
	- str valw, [dstend, -4]
	- ret
	-2: cbz count, 3f
	- strb valw, [dstin]
	- tbz count, 1, 3f
	- strh valw, [dstend, -2]
	-3: ret
	-
	- /* Set 17..96 bytes. */
	-L(set_medium):
	- str q0, [dstin]
	- tbnz count, 6, L(set96)
	- str q0, [dstend, -16]
	- tbz count, 5, 1f
	- str q0, [dstin, 16]
	- str q0, [dstend, -32]
	-1: ret
	-
	- .p2align 4
	- /* Set 64..96 bytes. Write 64 bytes from the start and
	- 32 bytes from the end. */
	-L(set96):
	- str q0, [dstin, 16]
	- stp q0, q0, [dstin, 32]
	- stp q0, q0, [dstend, -32]
	- ret
	-
	- .p2align 3
	- nop
	-L(set_long):
	- and valw, valw, 255
	- bic dst, dstin, 15
	- str q0, [dstin]
	- cmp count, 256
	- ccmp valw, 0, 0, cs
	- b.eq L(try_zva)
	-L(no_zva):
	- sub count, dstend, dst /* Count is 16 too large. */
	- add dst, dst, 16
	- sub count, count, 64 + 16 /* Adjust count and bias for loop. */
	-1: stp q0, q0, [dst], 64
	- stp q0, q0, [dst, -32]
	-L(tail64):
	- subs count, count, 64
	- b.hi 1b
	-2: stp q0, q0, [dstend, -64]
	- stp q0, q0, [dstend, -32]
	- ret
	-
	- .p2align 3
	-L(try_zva):
	- mrs tmp1, dczid_el0
	- tbnz tmp1w, 4, L(no_zva)
	- and tmp1w, tmp1w, 15
	- cmp tmp1w, 4 /* ZVA size is 64 bytes. */
	- b.ne L(zva_128)
	-
	- /* Write the first and last 64 byte aligned block using stp rather
	- than using DC ZVA. This is faster on some cores.
	- */
	-L(zva_64):
	- str q0, [dst, 16]
	- stp q0, q0, [dst, 32]
	- bic dst, dst, 63
	- stp q0, q0, [dst, 64]
	- stp q0, q0, [dst, 96]
	- sub count, dstend, dst /* Count is now 128 too large. */
	- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
	- add dst, dst, 128
	- nop
	-1: dc zva, dst
	- add dst, dst, 64
	- subs count, count, 64
	- b.hi 1b
	- stp q0, q0, [dst, 0]
	- stp q0, q0, [dst, 32]
	- stp q0, q0, [dstend, -64]
	- stp q0, q0, [dstend, -32]
	- ret
	-
	- .p2align 3
	-L(zva_128):
	- cmp tmp1w, 5 /* ZVA size is 128 bytes. */
	- b.ne L(zva_other)
	-
	- str q0, [dst, 16]
	- stp q0, q0, [dst, 32]
	- stp q0, q0, [dst, 64]
	- stp q0, q0, [dst, 96]
	- bic dst, dst, 127
	- sub count, dstend, dst /* Count is now 128 too large. */
	- sub count, count, 128+128 /* Adjust count and bias for loop. */
	- add dst, dst, 128
	-1: dc zva, dst
	- add dst, dst, 128
	- subs count, count, 128
	- b.hi 1b
	- stp q0, q0, [dstend, -128]
	- stp q0, q0, [dstend, -96]
	- stp q0, q0, [dstend, -64]
	- stp q0, q0, [dstend, -32]
	- ret
	-
	-L(zva_other):
	- mov tmp2w, 4
	- lsl zva_lenw, tmp2w, tmp1w
	- add tmp1, zva_len, 64 /* Max alignment bytes written. */
	- cmp count, tmp1
	- blo L(no_zva)
	-
	- sub tmp2, zva_len, 1
	- add tmp1, dst, zva_len
	- add dst, dst, 16
	- subs count, tmp1, dst /* Actual alignment bytes to write. */
	- bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
	- beq 2f
	-1: stp q0, q0, [dst], 64
	- stp q0, q0, [dst, -32]
	- subs count, count, 64
	- b.hi 1b
	-2: mov dst, tmp1
	- sub count, dstend, tmp1 /* Remaining bytes to write. */
	- subs count, count, zva_len
	- b.lo 4f
	-3: dc zva, dst
	- add dst, dst, zva_len
	- subs count, count, zva_len
	- b.hs 3b
	-4: add count, count, zva_len
	- b L(tail64)
	-
	- .size memset, . - memset
	diff --git a/contrib/cortex-strings/src/aarch64/strchr.S b/contrib/cortex-strings/src/aarch64/strchr.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/strchr.S
	+++ /dev/null
	@@ -1,165 +0,0 @@
	-/*
	- strchr - find a character in a string
	-
	- Copyright (c) 2014, ARM Limited
	- All rights Reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the company nor the names of its contributors
	- may be used to endorse or promote products derived from this
	- software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64
	- * Neon Available.
	- */
	-
	-/* Arguments and results. */
	-#define srcin x0
	-#define chrin w1
	-
	-#define result x0
	-
	-#define src x2
	-#define tmp1 x3
	-#define wtmp2 w4
	-#define tmp3 x5
	-
	-#define vrepchr v0
	-#define vdata1 v1
	-#define vdata2 v2
	-#define vhas_nul1 v3
	-#define vhas_nul2 v4
	-#define vhas_chr1 v5
	-#define vhas_chr2 v6
	-#define vrepmask_0 v7
	-#define vrepmask_c v16
	-#define vend1 v17
	-#define vend2 v18
	-
	-/* Core algorithm.
	-
	- For each 32-byte hunk we calculate a 64-bit syndrome value, with
	- two bits per byte (LSB is always in bits 0 and 1, for both big
	- and little-endian systems). For each tuple, bit 0 is set iff
	- the relevant byte matched the requested character; bit 1 is set
	- iff the relevant byte matched the NUL end of string (we trigger
	- off bit0 for the special case of looking for NUL). Since the bits
	- in the syndrome reflect exactly the order in which things occur
	- in the original string a count_trailing_zeros() operation will
	- identify exactly which byte is causing the termination, and why. */
	-
	-/* Locals and temporaries. */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	- .macro def_alias f a
	- .weak \a
	- .set \a,\f
	- .endm
	-
	-def_fn strchr
	-def_alias strchr index
	- /* Magic constant 0x40100401 to allow us to identify which lane
	- matches the requested byte. Magic constant 0x80200802 used
	- similarly for NUL termination. */
	- mov wtmp2, #0x0401
	- movk wtmp2, #0x4010, lsl #16
	- dup vrepchr.16b, chrin
	- bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
	- dup vrepmask_c.4s, wtmp2
	- ands tmp1, srcin, #31
	- add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
	- b.eq .Lloop
	-
	- /* Input string is not 32-byte aligned. Rather than forcing
	- the padding bytes to a safe value, we calculate the syndrome
	- for all the bytes, but then mask off those bits of the
	- syndrome that are related to the padding. */
	- ld1 {vdata1.16b, vdata2.16b}, [src], #32
	- neg tmp1, tmp1
	- cmeq vhas_nul1.16b, vdata1.16b, #0
	- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
	- cmeq vhas_nul2.16b, vdata2.16b, #0
	- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
	- and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
	- and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
	- and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
	- and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
	- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
	- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
	- lsl tmp1, tmp1, #1
	- addp vend1.16b, vend1.16b, vend2.16b // 256->128
	- mov tmp3, #~0
	- addp vend1.16b, vend1.16b, vend2.16b // 128->64
	- lsr tmp1, tmp3, tmp1
	-
	- mov tmp3, vend1.d[0]
	- bic tmp1, tmp3, tmp1 // Mask padding bits.
	- cbnz tmp1, .Ltail
	-
	-.Lloop:
	- ld1 {vdata1.16b, vdata2.16b}, [src], #32
	- cmeq vhas_nul1.16b, vdata1.16b, #0
	- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
	- cmeq vhas_nul2.16b, vdata2.16b, #0
	- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
	- /* Use a fast check for the termination condition. */
	- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
	- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
	- orr vend1.16b, vend1.16b, vend2.16b
	- addp vend1.2d, vend1.2d, vend1.2d
	- mov tmp1, vend1.d[0]
	- cbz tmp1, .Lloop
	-
	- /* Termination condition found. Now need to establish exactly why
	- we terminated. */
	- and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
	- and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
	- and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
	- and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
	- orr vend1.16b, vhas_nul1.16b, vhas_chr1.16b
	- orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
	- addp vend1.16b, vend1.16b, vend2.16b // 256->128
	- addp vend1.16b, vend1.16b, vend2.16b // 128->64
	-
	- mov tmp1, vend1.d[0]
	-.Ltail:
	- /* Count the trailing zeros, by bit reversing... */
	- rbit tmp1, tmp1
	- /* Re-bias source. */
	- sub src, src, #32
	- clz tmp1, tmp1 /* And counting the leading zeros. */
	- /* Tmp1 is even if the target charager was found first. Otherwise
	- we've found the end of string and we weren't looking for NUL. */
	- tst tmp1, #1
	- add result, src, tmp1, lsr #1
	- csel result, result, xzr, eq
	- ret
	-
	- .size strchr, . - strchr
	diff --git a/contrib/cortex-strings/src/aarch64/strchrnul.S b/contrib/cortex-strings/src/aarch64/strchrnul.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/strchrnul.S
	+++ /dev/null
	@@ -1,144 +0,0 @@
	-/*
	- strchrnul - find a character or nul in a string
	-
	- Copyright (c) 2014, ARM Limited
	- All rights Reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the company nor the names of its contributors
	- may be used to endorse or promote products derived from this
	- software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64
	- * Neon Available.
	- */
	-
	-/* Arguments and results. */
	-#define srcin x0
	-#define chrin w1
	-
	-#define result x0
	-
	-#define src x2
	-#define tmp1 x3
	-#define wtmp2 w4
	-#define tmp3 x5
	-
	-#define vrepchr v0
	-#define vdata1 v1
	-#define vdata2 v2
	-#define vhas_nul1 v3
	-#define vhas_nul2 v4
	-#define vhas_chr1 v5
	-#define vhas_chr2 v6
	-#define vrepmask v7
	-#define vend1 v16
	-
	-/* Core algorithm.
	-
	- For each 32-byte hunk we calculate a 64-bit syndrome value, with
	- two bits per byte (LSB is always in bits 0 and 1, for both big
	- and little-endian systems). For each tuple, bit 0 is set iff
	- the relevant byte matched the requested character or nul. Since the
	- bits in the syndrome reflect exactly the order in which things occur
	- in the original string a count_trailing_zeros() operation will
	- identify exactly which byte is causing the termination. */
	-
	-/* Locals and temporaries. */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-def_fn strchrnul
	- /* Magic constant 0x40100401 to allow us to identify which lane
	- matches the termination condition. */
	- mov wtmp2, #0x0401
	- movk wtmp2, #0x4010, lsl #16
	- dup vrepchr.16b, chrin
	- bic src, srcin, #31 /* Work with aligned 32-byte hunks. */
	- dup vrepmask.4s, wtmp2
	- ands tmp1, srcin, #31
	- b.eq .Lloop
	-
	- /* Input string is not 32-byte aligned. Rather than forcing
	- the padding bytes to a safe value, we calculate the syndrome
	- for all the bytes, but then mask off those bits of the
	- syndrome that are related to the padding. */
	- ld1 {vdata1.16b, vdata2.16b}, [src], #32
	- neg tmp1, tmp1
	- cmeq vhas_nul1.16b, vdata1.16b, #0
	- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
	- cmeq vhas_nul2.16b, vdata2.16b, #0
	- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
	- orr vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
	- orr vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
	- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
	- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
	- lsl tmp1, tmp1, #1
	- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
	- mov tmp3, #~0
	- addp vend1.16b, vend1.16b, vend1.16b // 128->64
	- lsr tmp1, tmp3, tmp1
	-
	- mov tmp3, vend1.d[0]
	- bic tmp1, tmp3, tmp1 // Mask padding bits.
	- cbnz tmp1, .Ltail
	-
	-.Lloop:
	- ld1 {vdata1.16b, vdata2.16b}, [src], #32
	- cmeq vhas_nul1.16b, vdata1.16b, #0
	- cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
	- cmeq vhas_nul2.16b, vdata2.16b, #0
	- cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
	- /* Use a fast check for the termination condition. */
	- orr vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
	- orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
	- orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
	- addp vend1.2d, vend1.2d, vend1.2d
	- mov tmp1, vend1.d[0]
	- cbz tmp1, .Lloop
	-
	- /* Termination condition found. Now need to establish exactly why
	- we terminated. */
	- and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
	- and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
	- addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
	- addp vend1.16b, vend1.16b, vend1.16b // 128->64
	-
	- mov tmp1, vend1.d[0]
	-.Ltail:
	- /* Count the trailing zeros, by bit reversing... */
	- rbit tmp1, tmp1
	- /* Re-bias source. */
	- sub src, src, #32
	- clz tmp1, tmp1 /* ... and counting the leading zeros. */
	- /* tmp1 is twice the offset into the fragment. */
	- add result, src, tmp1, lsr #1
	- ret
	-
	- .size strchrnul, . - strchrnul
	diff --git a/contrib/cortex-strings/src/aarch64/strcmp.S b/contrib/cortex-strings/src/aarch64/strcmp.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/strcmp.S
	+++ /dev/null
	@@ -1,166 +0,0 @@
	-/* Copyright (c) 2012, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64
	- */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-#define REP8_01 0x0101010101010101
	-#define REP8_7f 0x7f7f7f7f7f7f7f7f
	-#define REP8_80 0x8080808080808080
	-
	-/* Parameters and result. */
	-#define src1 x0
	-#define src2 x1
	-#define result x0
	-
	-/* Internal variables. */
	-#define data1 x2
	-#define data1w w2
	-#define data2 x3
	-#define data2w w3
	-#define has_nul x4
	-#define diff x5
	-#define syndrome x6
	-#define tmp1 x7
	-#define tmp2 x8
	-#define tmp3 x9
	-#define zeroones x10
	-#define pos x11
	-
	- /* Start of performance-critical section -- one 64B cache line. */
	-def_fn strcmp p2align=6
	- eor tmp1, src1, src2
	- mov zeroones, #REP8_01
	- tst tmp1, #7
	- b.ne .Lmisaligned8
	- ands tmp1, src1, #7
	- b.ne .Lmutual_align
	- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
	- (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
	- can be done in parallel across the entire word. */
	-.Lloop_aligned:
	- ldr data1, [src1], #8
	- ldr data2, [src2], #8
	-.Lstart_realigned:
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- eor diff, data1, data2 /* Non-zero if differences found. */
	- bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
	- orr syndrome, diff, has_nul
	- cbz syndrome, .Lloop_aligned
	- /* End of performance-critical section -- one 64B cache line. */
	-
	-#ifndef __AARCH64EB__
	- rev syndrome, syndrome
	- rev data1, data1
	- /* The MS-non-zero bit of the syndrome marks either the first bit
	- that is different, or the top bit of the first zero byte.
	- Shifting left now will bring the critical information into the
	- top bits. */
	- clz pos, syndrome
	- rev data2, data2
	- lsl data1, data1, pos
	- lsl data2, data2, pos
	- /* But we need to zero-extend (char is unsigned) the value and then
	- perform a signed 32-bit subtraction. */
	- lsr data1, data1, #56
	- sub result, data1, data2, lsr #56
	- ret
	-#else
	- /* For big-endian we cannot use the trick with the syndrome value
	- as carry-propagation can corrupt the upper bits if the trailing
	- bytes in the string contain 0x01. */
	- /* However, if there is no NUL byte in the dword, we can generate
	- the result directly. We can't just subtract the bytes as the
	- MSB might be significant. */
	- cbnz has_nul, 1f
	- cmp data1, data2
	- cset result, ne
	- cneg result, result, lo
	- ret
	-1:
	- /* Re-compute the NUL-byte detection, using a byte-reversed value. */
	- rev tmp3, data1
	- sub tmp1, tmp3, zeroones
	- orr tmp2, tmp3, #REP8_7f
	- bic has_nul, tmp1, tmp2
	- rev has_nul, has_nul
	- orr syndrome, diff, has_nul
	- clz pos, syndrome
	- /* The MS-non-zero bit of the syndrome marks either the first bit
	- that is different, or the top bit of the first zero byte.
	- Shifting left now will bring the critical information into the
	- top bits. */
	- lsl data1, data1, pos
	- lsl data2, data2, pos
	- /* But we need to zero-extend (char is unsigned) the value and then
	- perform a signed 32-bit subtraction. */
	- lsr data1, data1, #56
	- sub result, data1, data2, lsr #56
	- ret
	-#endif
	-
	-.Lmutual_align:
	- /* Sources are mutually aligned, but are not currently at an
	- alignment boundary. Round down the addresses and then mask off
	- the bytes that preceed the start point. */
	- bic src1, src1, #7
	- bic src2, src2, #7
	- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
	- ldr data1, [src1], #8
	- neg tmp1, tmp1 /* Bits to alignment -64. */
	- ldr data2, [src2], #8
	- mov tmp2, #~0
	-#ifdef __AARCH64EB__
	- /* Big-endian. Early bytes are at MSB. */
	- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
	-#else
	- /* Little-endian. Early bytes are at LSB. */
	- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
	-#endif
	- orr data1, data1, tmp2
	- orr data2, data2, tmp2
	- b .Lstart_realigned
	-
	-.Lmisaligned8:
	- /* We can do better than this. */
	- ldrb data1w, [src1], #1
	- ldrb data2w, [src2], #1
	- cmp data1w, #1
	- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
	- b.eq .Lmisaligned8
	- sub result, data1, data2
	- ret
	diff --git a/contrib/cortex-strings/src/aarch64/strcpy.S b/contrib/cortex-strings/src/aarch64/strcpy.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/strcpy.S
	+++ /dev/null
	@@ -1,336 +0,0 @@
	-/*
	- strcpy/stpcpy - copy a string returning pointer to start/end.
	-
	- Copyright (c) 2013, 2014, 2015 ARM Ltd.
	- All Rights Reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the company nor the names of its contributors
	- may be used to endorse or promote products derived from this
	- software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
	- */
	-
	-/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
	-
	- To test the page crossing code path more thoroughly, compile with
	- -DSTRCPY_TEST_PAGE_CROSS - this will force all copies through the slower
	- entry path. This option is not intended for production use. */
	-
	-/* Arguments and results. */
	-#define dstin x0
	-#define srcin x1
	-
	-/* Locals and temporaries. */
	-#define src x2
	-#define dst x3
	-#define data1 x4
	-#define data1w w4
	-#define data2 x5
	-#define data2w w5
	-#define has_nul1 x6
	-#define has_nul2 x7
	-#define tmp1 x8
	-#define tmp2 x9
	-#define tmp3 x10
	-#define tmp4 x11
	-#define zeroones x12
	-#define data1a x13
	-#define data2a x14
	-#define pos x15
	-#define len x16
	-#define to_align x17
	-
	-#ifdef BUILD_STPCPY
	-#define STRCPY stpcpy
	-#else
	-#define STRCPY strcpy
	-#endif
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
	- (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
	- can be done in parallel across the entire word. */
	-
	-#define REP8_01 0x0101010101010101
	-#define REP8_7f 0x7f7f7f7f7f7f7f7f
	-#define REP8_80 0x8080808080808080
	-
	- /* AArch64 systems have a minimum page size of 4k. We can do a quick
	- page size check for crossing this boundary on entry and if we
	- do not, then we can short-circuit much of the entry code. We
	- expect early page-crossing strings to be rare (probability of
	- 16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
	- predictable, even with random strings.
	-
	- We don't bother checking for larger page sizes, the cost of setting
	- up the correct page size is just not worth the extra gain from
	- a small reduction in the cases taking the slow path. Note that
	- we only care about whether the first fetch, which may be
	- misaligned, crosses a page boundary - after that we move to aligned
	- fetches for the remainder of the string. */
	-
	-#ifdef STRCPY_TEST_PAGE_CROSS
	- /* Make everything that isn't Qword aligned look like a page cross. */
	-#define MIN_PAGE_P2 4
	-#else
	-#define MIN_PAGE_P2 12
	-#endif
	-
	-#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
	-
	-def_fn STRCPY p2align=6
	- /* For moderately short strings, the fastest way to do the copy is to
	- calculate the length of the string in the same way as strlen, then
	- essentially do a memcpy of the result. This avoids the need for
	- multiple byte copies and further means that by the time we
	- reach the bulk copy loop we know we can always use DWord
	- accesses. We expect strcpy to rarely be called repeatedly
	- with the same source string, so branch prediction is likely to
	- always be difficult - we mitigate against this by preferring
	- conditional select operations over branches whenever this is
	- feasible. */
	- and tmp2, srcin, #(MIN_PAGE_SIZE - 1)
	- mov zeroones, #REP8_01
	- and to_align, srcin, #15
	- cmp tmp2, #(MIN_PAGE_SIZE - 16)
	- neg tmp1, to_align
	- /* The first fetch will straddle a (possible) page boundary iff
	- srcin + 15 causes bit[MIN_PAGE_P2] to change value. A 16-byte
	- aligned string will never fail the page align check, so will
	- always take the fast path. */
	- b.gt .Lpage_cross
	-
	-.Lpage_cross_ok:
	- ldp data1, data2, [srcin]
	-#ifdef __AARCH64EB__
	- /* Because we expect the end to be found within 16 characters
	- (profiling shows this is the most common case), it's worth
	- swapping the bytes now to save having to recalculate the
	- termination syndrome later. We preserve data1 and data2
	- so that we can re-use the values later on. */
	- rev tmp2, data1
	- sub tmp1, tmp2, zeroones
	- orr tmp2, tmp2, #REP8_7f
	- bics has_nul1, tmp1, tmp2
	- b.ne .Lfp_le8
	- rev tmp4, data2
	- sub tmp3, tmp4, zeroones
	- orr tmp4, tmp4, #REP8_7f
	-#else
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- bics has_nul1, tmp1, tmp2
	- b.ne .Lfp_le8
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, #REP8_7f
	-#endif
	- bics has_nul2, tmp3, tmp4
	- b.eq .Lbulk_entry
	-
	- /* The string is short (<=16 bytes). We don't know exactly how
	- short though, yet. Work out the exact length so that we can
	- quickly select the optimal copy strategy. */
	-.Lfp_gt8:
	- rev has_nul2, has_nul2
	- clz pos, has_nul2
	- mov tmp2, #56
	- add dst, dstin, pos, lsr #3 /* Bits to bytes. */
	- sub pos, tmp2, pos
	-#ifdef __AARCH64EB__
	- lsr data2, data2, pos
	-#else
	- lsl data2, data2, pos
	-#endif
	- str data2, [dst, #1]
	- str data1, [dstin]
	-#ifdef BUILD_STPCPY
	- add dstin, dst, #8
	-#endif
	- ret
	-
	-.Lfp_le8:
	- rev has_nul1, has_nul1
	- clz pos, has_nul1
	- add dst, dstin, pos, lsr #3 /* Bits to bytes. */
	- subs tmp2, pos, #24 /* Pos in bits. */
	- b.lt .Lfp_lt4
	-#ifdef __AARCH64EB__
	- mov tmp2, #56
	- sub pos, tmp2, pos
	- lsr data2, data1, pos
	- lsr data1, data1, #32
	-#else
	- lsr data2, data1, tmp2
	-#endif
	- /* 4->7 bytes to copy. */
	- str data2w, [dst, #-3]
	- str data1w, [dstin]
	-#ifdef BUILD_STPCPY
	- mov dstin, dst
	-#endif
	- ret
	-.Lfp_lt4:
	- cbz pos, .Lfp_lt2
	- /* 2->3 bytes to copy. */
	-#ifdef __AARCH64EB__
	- lsr data1, data1, #48
	-#endif
	- strh data1w, [dstin]
	- /* Fall-through, one byte (max) to go. */
	-.Lfp_lt2:
	- /* Null-terminated string. Last character must be zero! */
	- strb wzr, [dst]
	-#ifdef BUILD_STPCPY
	- mov dstin, dst
	-#endif
	- ret
	-
	- .p2align 6
	- /* Aligning here ensures that the entry code and main loop all lies
	- within one 64-byte cache line. */
	-.Lbulk_entry:
	- sub to_align, to_align, #16
	- stp data1, data2, [dstin]
	- sub src, srcin, to_align
	- sub dst, dstin, to_align
	- b .Lentry_no_page_cross
	-
	- /* The inner loop deals with two Dwords at a time. This has a
	- slightly higher start-up cost, but we should win quite quickly,
	- especially on cores with a high number of issue slots per
	- cycle, as we get much better parallelism out of the operations. */
	-.Lmain_loop:
	- stp data1, data2, [dst], #16
	-.Lentry_no_page_cross:
	- ldp data1, data2, [src], #16
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, #REP8_7f
	- bic has_nul1, tmp1, tmp2
	- bics has_nul2, tmp3, tmp4
	- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
	- b.eq .Lmain_loop
	-
	- /* Since we know we are copying at least 16 bytes, the fastest way
	- to deal with the tail is to determine the location of the
	- trailing NUL, then (re)copy the 16 bytes leading up to that. */
	- cmp has_nul1, #0
	-#ifdef __AARCH64EB__
	- /* For big-endian, carry propagation (if the final byte in the
	- string is 0x01) means we cannot use has_nul directly. The
	- easiest way to get the correct byte is to byte-swap the data
	- and calculate the syndrome a second time. */
	- csel data1, data1, data2, ne
	- rev data1, data1
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- bic has_nul1, tmp1, tmp2
	-#else
	- csel has_nul1, has_nul1, has_nul2, ne
	-#endif
	- rev has_nul1, has_nul1
	- clz pos, has_nul1
	- add tmp1, pos, #72
	- add pos, pos, #8
	- csel pos, pos, tmp1, ne
	- add src, src, pos, lsr #3
	- add dst, dst, pos, lsr #3
	- ldp data1, data2, [src, #-32]
	- stp data1, data2, [dst, #-16]
	-#ifdef BUILD_STPCPY
	- sub dstin, dst, #1
	-#endif
	- ret
	-
	-.Lpage_cross:
	- bic src, srcin, #15
	- /* Start by loading two words at [srcin & ~15], then forcing the
	- bytes that precede srcin to 0xff. This means they never look
	- like termination bytes. */
	- ldp data1, data2, [src]
	- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
	- tst to_align, #7
	- csetm tmp2, ne
	-#ifdef __AARCH64EB__
	- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
	-#else
	- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
	-#endif
	- orr data1, data1, tmp2
	- orr data2a, data2, tmp2
	- cmp to_align, #8
	- csinv data1, data1, xzr, lt
	- csel data2, data2, data2a, lt
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, #REP8_7f
	- bic has_nul1, tmp1, tmp2
	- bics has_nul2, tmp3, tmp4
	- ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */
	- b.eq .Lpage_cross_ok
	- /* We now need to make data1 and data2 look like they've been
	- loaded directly from srcin. Do a rotate on the 128-bit value. */
	- lsl tmp1, to_align, #3 /* Bytes->bits. */
	- neg tmp2, to_align, lsl #3
	-#ifdef __AARCH64EB__
	- lsl data1a, data1, tmp1
	- lsr tmp4, data2, tmp2
	- lsl data2, data2, tmp1
	- orr tmp4, tmp4, data1a
	- cmp to_align, #8
	- csel data1, tmp4, data2, lt
	- rev tmp2, data1
	- rev tmp4, data2
	- sub tmp1, tmp2, zeroones
	- orr tmp2, tmp2, #REP8_7f
	- sub tmp3, tmp4, zeroones
	- orr tmp4, tmp4, #REP8_7f
	-#else
	- lsr data1a, data1, tmp1
	- lsl tmp4, data2, tmp2
	- lsr data2, data2, tmp1
	- orr tmp4, tmp4, data1a
	- cmp to_align, #8
	- csel data1, tmp4, data2, lt
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, #REP8_7f
	-#endif
	- bic has_nul1, tmp1, tmp2
	- cbnz has_nul1, .Lfp_le8
	- bic has_nul2, tmp3, tmp4
	- b .Lfp_gt8
	-
	- .size STRCPY, . - STRCPY
	diff --git a/contrib/cortex-strings/src/aarch64/strlen.S b/contrib/cortex-strings/src/aarch64/strlen.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/strlen.S
	+++ /dev/null
	@@ -1,233 +0,0 @@
	-/* Copyright (c) 2013-2015, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
	- */
	-
	-/* To test the page crossing code path more thoroughly, compile with
	- -DTEST_PAGE_CROSS - this will force all calls through the slower
	- entry path. This option is not intended for production use. */
	-
	-/* Arguments and results. */
	-#define srcin x0
	-#define len x0
	-
	-/* Locals and temporaries. */
	-#define src x1
	-#define data1 x2
	-#define data2 x3
	-#define has_nul1 x4
	-#define has_nul2 x5
	-#define tmp1 x4
	-#define tmp2 x5
	-#define tmp3 x6
	-#define tmp4 x7
	-#define zeroones x8
	-
	-#define L(l) .L ## l
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
	- (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
	- can be done in parallel across the entire word. A faster check
	- (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
	- false hits for characters 129..255. */
	-
	-#define REP8_01 0x0101010101010101
	-#define REP8_7f 0x7f7f7f7f7f7f7f7f
	-#define REP8_80 0x8080808080808080
	-
	-#ifdef TEST_PAGE_CROSS
	-# define MIN_PAGE_SIZE 15
	-#else
	-# define MIN_PAGE_SIZE 4096
	-#endif
	-
	- /* Since strings are short on average, we check the first 16 bytes
	- of the string for a NUL character. In order to do an unaligned ldp
	- safely we have to do a page cross check first. If there is a NUL
	- byte we calculate the length from the 2 8-byte words using
	- conditional select to reduce branch mispredictions (it is unlikely
	- strlen will be repeatedly called on strings with the same length).
	-
	- If the string is longer than 16 bytes, we align src so don't need
	- further page cross checks, and process 32 bytes per iteration
	- using the fast NUL check. If we encounter non-ASCII characters,
	- fallback to a second loop using the full NUL check.
	-
	- If the page cross check fails, we read 16 bytes from an aligned
	- address, remove any characters before the string, and continue
	- in the main loop using aligned loads. Since strings crossing a
	- page in the first 16 bytes are rare (probability of
	- 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
	-
	- AArch64 systems have a minimum page size of 4k. We don't bother
	- checking for larger page sizes - the cost of setting up the correct
	- page size is just not worth the extra gain from a small reduction in
	- the cases taking the slow path. Note that we only care about
	- whether the first fetch, which may be misaligned, crosses a page
	- boundary. */
	-
	-def_fn strlen p2align=6
	- and tmp1, srcin, MIN_PAGE_SIZE - 1
	- mov zeroones, REP8_01
	- cmp tmp1, MIN_PAGE_SIZE - 16
	- b.gt L(page_cross)
	- ldp data1, data2, [srcin]
	-#ifdef __AARCH64EB__
	- /* For big-endian, carry propagation (if the final byte in the
	- string is 0x01) means we cannot use has_nul1/2 directly.
	- Since we expect strings to be small and early-exit,
	- byte-swap the data now so has_null1/2 will be correct. */
	- rev data1, data1
	- rev data2, data2
	-#endif
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, REP8_7f
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, REP8_7f
	- bics has_nul1, tmp1, tmp2
	- bic has_nul2, tmp3, tmp4
	- ccmp has_nul2, 0, 0, eq
	- beq L(main_loop_entry)
	-
	- /* Enter with C = has_nul1 == 0. */
	- csel has_nul1, has_nul1, has_nul2, cc
	- mov len, 8
	- rev has_nul1, has_nul1
	- clz tmp1, has_nul1
	- csel len, xzr, len, cc
	- add len, len, tmp1, lsr 3
	- ret
	-
	- /* The inner loop processes 32 bytes per iteration and uses the fast
	- NUL check. If we encounter non-ASCII characters, use a second
	- loop with the accurate NUL check. */
	- .p2align 4
	-L(main_loop_entry):
	- bic src, srcin, 15
	- sub src, src, 16
	-L(main_loop):
	- ldp data1, data2, [src, 32]!
	-.Lpage_cross_entry:
	- sub tmp1, data1, zeroones
	- sub tmp3, data2, zeroones
	- orr tmp2, tmp1, tmp3
	- tst tmp2, zeroones, lsl 7
	- bne 1f
	- ldp data1, data2, [src, 16]
	- sub tmp1, data1, zeroones
	- sub tmp3, data2, zeroones
	- orr tmp2, tmp1, tmp3
	- tst tmp2, zeroones, lsl 7
	- beq L(main_loop)
	- add src, src, 16
	-1:
	- /* The fast check failed, so do the slower, accurate NUL check. */
	- orr tmp2, data1, REP8_7f
	- orr tmp4, data2, REP8_7f
	- bics has_nul1, tmp1, tmp2
	- bic has_nul2, tmp3, tmp4
	- ccmp has_nul2, 0, 0, eq
	- beq L(nonascii_loop)
	-
	- /* Enter with C = has_nul1 == 0. */
	-L(tail):
	-#ifdef __AARCH64EB__
	- /* For big-endian, carry propagation (if the final byte in the
	- string is 0x01) means we cannot use has_nul1/2 directly. The
	- easiest way to get the correct byte is to byte-swap the data
	- and calculate the syndrome a second time. */
	- csel data1, data1, data2, cc
	- rev data1, data1
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, REP8_7f
	- bic has_nul1, tmp1, tmp2
	-#else
	- csel has_nul1, has_nul1, has_nul2, cc
	-#endif
	- sub len, src, srcin
	- rev has_nul1, has_nul1
	- add tmp2, len, 8
	- clz tmp1, has_nul1
	- csel len, len, tmp2, cc
	- add len, len, tmp1, lsr 3
	- ret
	-
	-L(nonascii_loop):
	- ldp data1, data2, [src, 16]!
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, REP8_7f
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, REP8_7f
	- bics has_nul1, tmp1, tmp2
	- bic has_nul2, tmp3, tmp4
	- ccmp has_nul2, 0, 0, eq
	- bne L(tail)
	- ldp data1, data2, [src, 16]!
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, REP8_7f
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, REP8_7f
	- bics has_nul1, tmp1, tmp2
	- bic has_nul2, tmp3, tmp4
	- ccmp has_nul2, 0, 0, eq
	- beq L(nonascii_loop)
	- b L(tail)
	-
	- /* Load 16 bytes from [srcin & ~15] and force the bytes that precede
	- srcin to 0x7f, so we ignore any NUL bytes before the string.
	- Then continue in the aligned loop. */
	-L(page_cross):
	- bic src, srcin, 15
	- ldp data1, data2, [src]
	- lsl tmp1, srcin, 3
	- mov tmp4, -1
	-#ifdef __AARCH64EB__
	- /* Big-endian. Early bytes are at MSB. */
	- lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
	-#else
	- /* Little-endian. Early bytes are at LSB. */
	- lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */
	-#endif
	- orr tmp1, tmp1, REP8_80
	- orn data1, data1, tmp1
	- orn tmp2, data2, tmp1
	- tst srcin, 8
	- csel data1, data1, tmp4, eq
	- csel data2, data2, tmp2, eq
	- b L(page_cross_entry)
	-
	- .size strlen, . - strlen
	diff --git a/contrib/cortex-strings/src/aarch64/strncmp.S b/contrib/cortex-strings/src/aarch64/strncmp.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/strncmp.S
	+++ /dev/null
	@@ -1,222 +0,0 @@
	-/* Copyright (c) 2013, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64
	- */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-#define REP8_01 0x0101010101010101
	-#define REP8_7f 0x7f7f7f7f7f7f7f7f
	-#define REP8_80 0x8080808080808080
	-
	-/* Parameters and result. */
	-#define src1 x0
	-#define src2 x1
	-#define limit x2
	-#define result x0
	-
	-/* Internal variables. */
	-#define data1 x3
	-#define data1w w3
	-#define data2 x4
	-#define data2w w4
	-#define has_nul x5
	-#define diff x6
	-#define syndrome x7
	-#define tmp1 x8
	-#define tmp2 x9
	-#define tmp3 x10
	-#define zeroones x11
	-#define pos x12
	-#define limit_wd x13
	-#define mask x14
	-#define endloop x15
	-
	- .text
	- .p2align 6
	- .rep 7
	- nop /* Pad so that the loop below fits a cache line. */
	- .endr
	-def_fn strncmp
	- cbz limit, .Lret0
	- eor tmp1, src1, src2
	- mov zeroones, #REP8_01
	- tst tmp1, #7
	- b.ne .Lmisaligned8
	- ands tmp1, src1, #7
	- b.ne .Lmutual_align
	- /* Calculate the number of full and partial words -1. */
	- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
	- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
	-
	- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
	- (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
	- can be done in parallel across the entire word. */
	- /* Start of performance-critical section -- one 64B cache line. */
	-.Lloop_aligned:
	- ldr data1, [src1], #8
	- ldr data2, [src2], #8
	-.Lstart_realigned:
	- subs limit_wd, limit_wd, #1
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- eor diff, data1, data2 /* Non-zero if differences found. */
	- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
	- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
	- ccmp endloop, #0, #0, eq
	- b.eq .Lloop_aligned
	- /* End of performance-critical section -- one 64B cache line. */
	-
	- /* Not reached the limit, must have found the end or a diff. */
	- tbz limit_wd, #63, .Lnot_limit
	-
	- /* Limit % 8 == 0 => all bytes significant. */
	- ands limit, limit, #7
	- b.eq .Lnot_limit
	-
	- lsl limit, limit, #3 /* Bits -> bytes. */
	- mov mask, #~0
	-#ifdef __AARCH64EB__
	- lsr mask, mask, limit
	-#else
	- lsl mask, mask, limit
	-#endif
	- bic data1, data1, mask
	- bic data2, data2, mask
	-
	- /* Make sure that the NUL byte is marked in the syndrome. */
	- orr has_nul, has_nul, mask
	-
	-.Lnot_limit:
	- orr syndrome, diff, has_nul
	-
	-#ifndef __AARCH64EB__
	- rev syndrome, syndrome
	- rev data1, data1
	- /* The MS-non-zero bit of the syndrome marks either the first bit
	- that is different, or the top bit of the first zero byte.
	- Shifting left now will bring the critical information into the
	- top bits. */
	- clz pos, syndrome
	- rev data2, data2
	- lsl data1, data1, pos
	- lsl data2, data2, pos
	- /* But we need to zero-extend (char is unsigned) the value and then
	- perform a signed 32-bit subtraction. */
	- lsr data1, data1, #56
	- sub result, data1, data2, lsr #56
	- ret
	-#else
	- /* For big-endian we cannot use the trick with the syndrome value
	- as carry-propagation can corrupt the upper bits if the trailing
	- bytes in the string contain 0x01. */
	- /* However, if there is no NUL byte in the dword, we can generate
	- the result directly. We can't just subtract the bytes as the
	- MSB might be significant. */
	- cbnz has_nul, 1f
	- cmp data1, data2
	- cset result, ne
	- cneg result, result, lo
	- ret
	-1:
	- /* Re-compute the NUL-byte detection, using a byte-reversed value. */
	- rev tmp3, data1
	- sub tmp1, tmp3, zeroones
	- orr tmp2, tmp3, #REP8_7f
	- bic has_nul, tmp1, tmp2
	- rev has_nul, has_nul
	- orr syndrome, diff, has_nul
	- clz pos, syndrome
	- /* The MS-non-zero bit of the syndrome marks either the first bit
	- that is different, or the top bit of the first zero byte.
	- Shifting left now will bring the critical information into the
	- top bits. */
	- lsl data1, data1, pos
	- lsl data2, data2, pos
	- /* But we need to zero-extend (char is unsigned) the value and then
	- perform a signed 32-bit subtraction. */
	- lsr data1, data1, #56
	- sub result, data1, data2, lsr #56
	- ret
	-#endif
	-
	-.Lmutual_align:
	- /* Sources are mutually aligned, but are not currently at an
	- alignment boundary. Round down the addresses and then mask off
	- the bytes that precede the start point.
	- We also need to adjust the limit calculations, but without
	- overflowing if the limit is near ULONG_MAX. */
	- bic src1, src1, #7
	- bic src2, src2, #7
	- ldr data1, [src1], #8
	- neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
	- ldr data2, [src2], #8
	- mov tmp2, #~0
	- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
	-#ifdef __AARCH64EB__
	- /* Big-endian. Early bytes are at MSB. */
	- lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
	-#else
	- /* Little-endian. Early bytes are at LSB. */
	- lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
	-#endif
	- and tmp3, limit_wd, #7
	- lsr limit_wd, limit_wd, #3
	- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
	- add limit, limit, tmp1
	- add tmp3, tmp3, tmp1
	- orr data1, data1, tmp2
	- orr data2, data2, tmp2
	- add limit_wd, limit_wd, tmp3, lsr #3
	- b .Lstart_realigned
	-
	-.Lret0:
	- mov result, #0
	- ret
	-
	- .p2align 6
	-.Lmisaligned8:
	- sub limit, limit, #1
	-1:
	- /* Perhaps we can do better than this. */
	- ldrb data1w, [src1], #1
	- ldrb data2w, [src2], #1
	- subs limit, limit, #1
	- ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */
	- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
	- b.eq 1b
	- sub result, data1, data2
	- ret
	- .size strncmp, . - strncmp
	diff --git a/contrib/cortex-strings/src/aarch64/strnlen.S b/contrib/cortex-strings/src/aarch64/strnlen.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/aarch64/strnlen.S
	+++ /dev/null
	@@ -1,181 +0,0 @@
	-/* strnlen - calculate the length of a string with limit.
	-
	- Copyright (c) 2013, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions are met:
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	- * Neither the name of the Linaro nor the
	- names of its contributors may be used to endorse or promote products
	- derived from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
	-
	-/* Assumptions:
	- *
	- * ARMv8-a, AArch64
	- */
	-
	-/* Arguments and results. */
	-#define srcin x0
	-#define len x0
	-#define limit x1
	-
	-/* Locals and temporaries. */
	-#define src x2
	-#define data1 x3
	-#define data2 x4
	-#define data2a x5
	-#define has_nul1 x6
	-#define has_nul2 x7
	-#define tmp1 x8
	-#define tmp2 x9
	-#define tmp3 x10
	-#define tmp4 x11
	-#define zeroones x12
	-#define pos x13
	-#define limit_wd x14
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-#define REP8_01 0x0101010101010101
	-#define REP8_7f 0x7f7f7f7f7f7f7f7f
	-#define REP8_80 0x8080808080808080
	-
	- .text
	- .p2align 6
	-.Lstart:
	- /* Pre-pad to ensure critical loop begins an icache line. */
	- .rep 7
	- nop
	- .endr
	- /* Put this code here to avoid wasting more space with pre-padding. */
	-.Lhit_limit:
	- mov len, limit
	- ret
	-
	-def_fn strnlen
	- cbz limit, .Lhit_limit
	- mov zeroones, #REP8_01
	- bic src, srcin, #15
	- ands tmp1, srcin, #15
	- b.ne .Lmisaligned
	- /* Calculate the number of full and partial words -1. */
	- sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
	- lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
	-
	- /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
	- (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
	- can be done in parallel across the entire word. */
	- /* The inner loop deals with two Dwords at a time. This has a
	- slightly higher start-up cost, but we should win quite quickly,
	- especially on cores with a high number of issue slots per
	- cycle, as we get much better parallelism out of the operations. */
	-
	- /* Start of critial section -- keep to one 64Byte cache line. */
	-.Lloop:
	- ldp data1, data2, [src], #16
	-.Lrealigned:
	- sub tmp1, data1, zeroones
	- orr tmp2, data1, #REP8_7f
	- sub tmp3, data2, zeroones
	- orr tmp4, data2, #REP8_7f
	- bic has_nul1, tmp1, tmp2
	- bic has_nul2, tmp3, tmp4
	- subs limit_wd, limit_wd, #1
	- orr tmp1, has_nul1, has_nul2
	- ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
	- b.eq .Lloop
	- /* End of critical section -- keep to one 64Byte cache line. */
	-
	- orr tmp1, has_nul1, has_nul2
	- cbz tmp1, .Lhit_limit /* No null in final Qword. */
	-
	- /* We know there's a null in the final Qword. The easiest thing
	- to do now is work out the length of the string and return
	- MIN (len, limit). */
	-
	- sub len, src, srcin
	- cbz has_nul1, .Lnul_in_data2
	-#ifdef __AARCH64EB__
	- mov data2, data1
	-#endif
	- sub len, len, #8
	- mov has_nul2, has_nul1
	-.Lnul_in_data2:
	-#ifdef __AARCH64EB__
	- /* For big-endian, carry propagation (if the final byte in the
	- string is 0x01) means we cannot use has_nul directly. The
	- easiest way to get the correct byte is to byte-swap the data
	- and calculate the syndrome a second time. */
	- rev data2, data2
	- sub tmp1, data2, zeroones
	- orr tmp2, data2, #REP8_7f
	- bic has_nul2, tmp1, tmp2
	-#endif
	- sub len, len, #8
	- rev has_nul2, has_nul2
	- clz pos, has_nul2
	- add len, len, pos, lsr #3 /* Bits to bytes. */
	- cmp len, limit
	- csel len, len, limit, ls /* Return the lower value. */
	- ret
	-
	-.Lmisaligned:
	- /* Deal with a partial first word.
	- We're doing two things in parallel here;
	- 1) Calculate the number of words (but avoiding overflow if
	- limit is near ULONG_MAX) - to do this we need to work out
	- limit + tmp1 - 1 as a 65-bit value before shifting it;
	- 2) Load and mask the initial data words - we force the bytes
	- before the ones we are interested in to 0xff - this ensures
	- early bytes will not hit any zero detection. */
	- sub limit_wd, limit, #1
	- neg tmp4, tmp1
	- cmp tmp1, #8
	-
	- and tmp3, limit_wd, #15
	- lsr limit_wd, limit_wd, #4
	- mov tmp2, #~0
	-
	- ldp data1, data2, [src], #16
	- lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
	- add tmp3, tmp3, tmp1
	-
	-#ifdef __AARCH64EB__
	- /* Big-endian. Early bytes are at MSB. */
	- lsl tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
	-#else
	- /* Little-endian. Early bytes are at LSB. */
	- lsr tmp2, tmp2, tmp4 /* Shift (tmp1 & 63). */
	-#endif
	- add limit_wd, limit_wd, tmp3, lsr #4
	-
	- orr data1, data1, tmp2
	- orr data2a, data2, tmp2
	-
	- csinv data1, data1, xzr, le
	- csel data2, data2, data2a, le
	- b .Lrealigned
	- .size strnlen, . - .Lstart /* Include pre-padding in size. */
	diff --git a/contrib/cortex-strings/src/arm/memchr.S b/contrib/cortex-strings/src/arm/memchr.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/arm/memchr.S
	+++ /dev/null
	@@ -1,155 +0,0 @@
	-/* Copyright (c) 2010-2011, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions
	- are met:
	-
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	-
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	-
	- * Neither the name of Linaro Limited nor the names of its
	- contributors may be used to endorse or promote products derived
	- from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/*
	- Written by Dave Gilbert <david.gilbert@linaro.org>
	-
	- This memchr routine is optimised on a Cortex-A9 and should work on
	- all ARMv7 processors. It has a fast past for short sizes, and has
	- an optimised path for large data sets; the worst case is finding the
	- match early in a large data set.
	-
	- */
	-
	-@ 2011-02-07 david.gilbert@linaro.org
	-@ Extracted from local git a5b438d861
	-@ 2011-07-14 david.gilbert@linaro.org
	-@ Import endianness fix from local git ea786f1b
	-@ 2011-12-07 david.gilbert@linaro.org
	-@ Removed unneeded cbz from align loop
	-
	- .syntax unified
	- .arch armv7-a
	-
	-@ this lets us check a flag in a 00/ff byte easily in either endianness
	-#ifdef __ARMEB__
	-#define CHARTSTMASK(c) 1<<(31-(c*8))
	-#else
	-#define CHARTSTMASK(c) 1<<(c*8)
	-#endif
	- .text
	- .thumb
	-
	-@ ---------------------------------------------------------------------------
	- .thumb_func
	- .align 2
	- .p2align 4,,15
	- .global memchr
	- .type memchr,%function
	-memchr:
	- @ r0 = start of memory to scan
	- @ r1 = character to look for
	- @ r2 = length
	- @ returns r0 = pointer to character or NULL if not found
	- and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
	-
	- cmp r2,#16 @ If it's short don't bother with anything clever
	- blt 20f
	-
	- tst r0, #7 @ If it's already aligned skip the next bit
	- beq 10f
	-
	- @ Work up to an aligned point
	-5:
	- ldrb r3, [r0],#1
	- subs r2, r2, #1
	- cmp r3, r1
	- beq 50f @ If it matches exit found
	- tst r0, #7
	- bne 5b @ If not aligned yet then do next byte
	-
	-10:
	- @ At this point, we are aligned, we know we have at least 8 bytes to work with
	- push {r4,r5,r6,r7}
	- orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
	- orr r1, r1, r1, lsl #16
	- bic r4, r2, #7 @ Number of double words to work with
	- mvns r7, #0 @ all F's
	- movs r3, #0
	-
	-15:
	- ldmia r0!,{r5,r6}
	- subs r4, r4, #8
	- eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target
	- eor r6,r6, r1
	- uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
	- sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
	- uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
	- sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
	- cbnz r6, 60f
	- bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
	-
	- pop {r4,r5,r6,r7}
	- and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
	- and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
	-
	-20:
	- cbz r2, 40f @ 0 length or hit the end already then not found
	-
	-21: @ Post aligned section, or just a short call
	- ldrb r3,[r0],#1
	- subs r2,r2,#1
	- eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
	- cbz r3, 50f
	- bne 21b @ on r2 flags
	-
	-40:
	- movs r0,#0 @ not found
	- bx lr
	-
	-50:
	- subs r0,r0,#1 @ found
	- bx lr
	-
	-60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
	- @ r0 points to the start of the double word after the one that was tested
	- @ r5 has the 00/ff pattern for the first word, r6 has the chained value
	- cmp r5, #0
	- itte eq
	- moveq r5, r6 @ the end is in the 2nd word
	- subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
	- subne r0,r0,#7 @ or 2nd byte of 1st word
	-
	- @ r0 currently points to the 3rd byte of the word containing the hit
	- tst r5, # CHARTSTMASK(0) @ 1st character
	- bne 61f
	- adds r0,r0,#1
	- tst r5, # CHARTSTMASK(1) @ 2nd character
	- ittt eq
	- addeq r0,r0,#1
	- tsteq r5, # (3<<15) @ 2nd & 3rd character
	- @ If not the 3rd must be the last one
	- addeq r0,r0,#1
	-
	-61:
	- pop {r4,r5,r6,r7}
	- subs r0,r0,#1
	- bx lr
	diff --git a/contrib/cortex-strings/src/arm/memcpy.S b/contrib/cortex-strings/src/arm/memcpy.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/arm/memcpy.S
	+++ /dev/null
	@@ -1,617 +0,0 @@
	-/* Copyright (c) 2013, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions
	- are met:
	-
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	-
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	-
	- * Neither the name of Linaro Limited nor the names of its
	- contributors may be used to endorse or promote products derived
	- from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/*
	- This memcpy routine is optimised for Cortex-A15 cores and takes advantage
	- of VFP or NEON when built with the appropriate flags.
	-
	- Assumptions:
	-
	- ARMv6 (ARMv7-a if using Neon)
	- ARM state
	- Unaligned accesses
	-
	- */
	-
	- .syntax unified
	- /* This implementation requires ARM state. */
	- .arm
	-
	-#ifdef __ARM_NEON__
	-
	- .fpu neon
	- .arch armv7-a
	-# define FRAME_SIZE 4
	-# define USE_VFP
	-# define USE_NEON
	-
	-#elif !defined (__SOFTFP__)
	-
	- .arch armv6
	- .fpu vfpv2
	-# define FRAME_SIZE 32
	-# define USE_VFP
	-
	-#else
	- .arch armv6
	-# define FRAME_SIZE 32
	-
	-#endif
	-
	-/* Old versions of GAS incorrectly implement the NEON align semantics. */
	-#ifdef BROKEN_ASM_NEON_ALIGN
	-#define ALIGN(addr, align) addr,:align
	-#else
	-#define ALIGN(addr, align) addr:align
	-#endif
	-
	-#define PC_OFFSET 8 /* PC pipeline compensation. */
	-#define INSN_SIZE 4
	-
	-/* Call parameters. */
	-#define dstin r0
	-#define src r1
	-#define count r2
	-
	-/* Locals. */
	-#define tmp1 r3
	-#define dst ip
	-#define tmp2 r10
	-
	-#ifndef USE_NEON
	-/* For bulk copies using GP registers. */
	-#define A_l r2 /* Call-clobbered. */
	-#define A_h r3 /* Call-clobbered. */
	-#define B_l r4
	-#define B_h r5
	-#define C_l r6
	-#define C_h r7
	-#define D_l r8
	-#define D_h r9
	-#endif
	-
	-/* Number of lines ahead to pre-fetch data. If you change this the code
	- below will need adjustment to compensate. */
	-
	-#define prefetch_lines 5
	-
	-#ifdef USE_VFP
	- .macro cpy_line_vfp vreg, base
	- vstr \vreg, [dst, #\base]
	- vldr \vreg, [src, #\base]
	- vstr d0, [dst, #\base + 8]
	- vldr d0, [src, #\base + 8]
	- vstr d1, [dst, #\base + 16]
	- vldr d1, [src, #\base + 16]
	- vstr d2, [dst, #\base + 24]
	- vldr d2, [src, #\base + 24]
	- vstr \vreg, [dst, #\base + 32]
	- vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
	- vstr d0, [dst, #\base + 40]
	- vldr d0, [src, #\base + 40]
	- vstr d1, [dst, #\base + 48]
	- vldr d1, [src, #\base + 48]
	- vstr d2, [dst, #\base + 56]
	- vldr d2, [src, #\base + 56]
	- .endm
	-
	- .macro cpy_tail_vfp vreg, base
	- vstr \vreg, [dst, #\base]
	- vldr \vreg, [src, #\base]
	- vstr d0, [dst, #\base + 8]
	- vldr d0, [src, #\base + 8]
	- vstr d1, [dst, #\base + 16]
	- vldr d1, [src, #\base + 16]
	- vstr d2, [dst, #\base + 24]
	- vldr d2, [src, #\base + 24]
	- vstr \vreg, [dst, #\base + 32]
	- vstr d0, [dst, #\base + 40]
	- vldr d0, [src, #\base + 40]
	- vstr d1, [dst, #\base + 48]
	- vldr d1, [src, #\base + 48]
	- vstr d2, [dst, #\base + 56]
	- vldr d2, [src, #\base + 56]
	- .endm
	-#endif
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-def_fn memcpy p2align=6
	-
	- mov dst, dstin /* Preserve dstin, we need to return it. */
	- cmp count, #64
	- bge .Lcpy_not_short
	- /* Deal with small copies quickly by dropping straight into the
	- exit block. */
	-
	-.Ltail63unaligned:
	-#ifdef USE_NEON
	- and tmp1, count, #0x38
	- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
	- add pc, pc, tmp1
	- vld1.8 {d0}, [src]! /* 14 words to go. */
	- vst1.8 {d0}, [dst]!
	- vld1.8 {d0}, [src]! /* 12 words to go. */
	- vst1.8 {d0}, [dst]!
	- vld1.8 {d0}, [src]! /* 10 words to go. */
	- vst1.8 {d0}, [dst]!
	- vld1.8 {d0}, [src]! /* 8 words to go. */
	- vst1.8 {d0}, [dst]!
	- vld1.8 {d0}, [src]! /* 6 words to go. */
	- vst1.8 {d0}, [dst]!
	- vld1.8 {d0}, [src]! /* 4 words to go. */
	- vst1.8 {d0}, [dst]!
	- vld1.8 {d0}, [src]! /* 2 words to go. */
	- vst1.8 {d0}, [dst]!
	-
	- tst count, #4
	- ldrne tmp1, [src], #4
	- strne tmp1, [dst], #4
	-#else
	- /* Copy up to 15 full words of data. May not be aligned. */
	- /* Cannot use VFP for unaligned data. */
	- and tmp1, count, #0x3c
	- add dst, dst, tmp1
	- add src, src, tmp1
	- rsb tmp1, tmp1, #(60 - PC_OFFSET/2 + INSN_SIZE/2)
	- /* Jump directly into the sequence below at the correct offset. */
	- add pc, pc, tmp1, lsl #1
	-
	- ldr tmp1, [src, #-60] /* 15 words to go. */
	- str tmp1, [dst, #-60]
	-
	- ldr tmp1, [src, #-56] /* 14 words to go. */
	- str tmp1, [dst, #-56]
	- ldr tmp1, [src, #-52]
	- str tmp1, [dst, #-52]
	-
	- ldr tmp1, [src, #-48] /* 12 words to go. */
	- str tmp1, [dst, #-48]
	- ldr tmp1, [src, #-44]
	- str tmp1, [dst, #-44]
	-
	- ldr tmp1, [src, #-40] /* 10 words to go. */
	- str tmp1, [dst, #-40]
	- ldr tmp1, [src, #-36]
	- str tmp1, [dst, #-36]
	-
	- ldr tmp1, [src, #-32] /* 8 words to go. */
	- str tmp1, [dst, #-32]
	- ldr tmp1, [src, #-28]
	- str tmp1, [dst, #-28]
	-
	- ldr tmp1, [src, #-24] /* 6 words to go. */
	- str tmp1, [dst, #-24]
	- ldr tmp1, [src, #-20]
	- str tmp1, [dst, #-20]
	-
	- ldr tmp1, [src, #-16] /* 4 words to go. */
	- str tmp1, [dst, #-16]
	- ldr tmp1, [src, #-12]
	- str tmp1, [dst, #-12]
	-
	- ldr tmp1, [src, #-8] /* 2 words to go. */
	- str tmp1, [dst, #-8]
	- ldr tmp1, [src, #-4]
	- str tmp1, [dst, #-4]
	-#endif
	-
	- lsls count, count, #31
	- ldrhcs tmp1, [src], #2
	- ldrbne src, [src] /* Src is dead, use as a scratch. */
	- strhcs tmp1, [dst], #2
	- strbne src, [dst]
	- bx lr
	-
	-.Lcpy_not_short:
	- /* At least 64 bytes to copy, but don't know the alignment yet. */
	- str tmp2, [sp, #-FRAME_SIZE]!
	- and tmp2, src, #7
	- and tmp1, dst, #7
	- cmp tmp1, tmp2
	- bne .Lcpy_notaligned
	-
	-#ifdef USE_VFP
	- /* Magic dust alert! Force VFP on Cortex-A9. Experiments show
	- that the FP pipeline is much better at streaming loads and
	- stores. This is outside the critical loop. */
	- vmov.f32 s0, s0
	-#endif
	-
	- /* SRC and DST have the same mutual 64-bit alignment, but we may
	- still need to pre-copy some bytes to get to natural alignment.
	- We bring SRC and DST into full 64-bit alignment. */
	- lsls tmp2, dst, #29
	- beq 1f
	- rsbs tmp2, tmp2, #0
	- sub count, count, tmp2, lsr #29
	- ldrmi tmp1, [src], #4
	- strmi tmp1, [dst], #4
	- lsls tmp2, tmp2, #2
	- ldrhcs tmp1, [src], #2
	- ldrbne tmp2, [src], #1
	- strhcs tmp1, [dst], #2
	- strbne tmp2, [dst], #1
	-
	-1:
	- subs tmp2, count, #64 /* Use tmp2 for count. */
	- blt .Ltail63aligned
	-
	- cmp tmp2, #512
	- bge .Lcpy_body_long
	-
	-.Lcpy_body_medium: /* Count in tmp2. */
	-#ifdef USE_VFP
	-1:
	- vldr d0, [src, #0]
	- subs tmp2, tmp2, #64
	- vldr d1, [src, #8]
	- vstr d0, [dst, #0]
	- vldr d0, [src, #16]
	- vstr d1, [dst, #8]
	- vldr d1, [src, #24]
	- vstr d0, [dst, #16]
	- vldr d0, [src, #32]
	- vstr d1, [dst, #24]
	- vldr d1, [src, #40]
	- vstr d0, [dst, #32]
	- vldr d0, [src, #48]
	- vstr d1, [dst, #40]
	- vldr d1, [src, #56]
	- vstr d0, [dst, #48]
	- add src, src, #64
	- vstr d1, [dst, #56]
	- add dst, dst, #64
	- bge 1b
	- tst tmp2, #0x3f
	- beq .Ldone
	-
	-.Ltail63aligned: /* Count in tmp2. */
	- and tmp1, tmp2, #0x38
	- add dst, dst, tmp1
	- add src, src, tmp1
	- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
	- add pc, pc, tmp1
	-
	- vldr d0, [src, #-56] /* 14 words to go. */
	- vstr d0, [dst, #-56]
	- vldr d0, [src, #-48] /* 12 words to go. */
	- vstr d0, [dst, #-48]
	- vldr d0, [src, #-40] /* 10 words to go. */
	- vstr d0, [dst, #-40]
	- vldr d0, [src, #-32] /* 8 words to go. */
	- vstr d0, [dst, #-32]
	- vldr d0, [src, #-24] /* 6 words to go. */
	- vstr d0, [dst, #-24]
	- vldr d0, [src, #-16] /* 4 words to go. */
	- vstr d0, [dst, #-16]
	- vldr d0, [src, #-8] /* 2 words to go. */
	- vstr d0, [dst, #-8]
	-#else
	- sub src, src, #8
	- sub dst, dst, #8
	-1:
	- ldrd A_l, A_h, [src, #8]
	- strd A_l, A_h, [dst, #8]
	- ldrd A_l, A_h, [src, #16]
	- strd A_l, A_h, [dst, #16]
	- ldrd A_l, A_h, [src, #24]
	- strd A_l, A_h, [dst, #24]
	- ldrd A_l, A_h, [src, #32]
	- strd A_l, A_h, [dst, #32]
	- ldrd A_l, A_h, [src, #40]
	- strd A_l, A_h, [dst, #40]
	- ldrd A_l, A_h, [src, #48]
	- strd A_l, A_h, [dst, #48]
	- ldrd A_l, A_h, [src, #56]
	- strd A_l, A_h, [dst, #56]
	- ldrd A_l, A_h, [src, #64]!
	- strd A_l, A_h, [dst, #64]!
	- subs tmp2, tmp2, #64
	- bge 1b
	- tst tmp2, #0x3f
	- bne 1f
	- ldr tmp2,[sp], #FRAME_SIZE
	- bx lr
	-1:
	- add src, src, #8
	- add dst, dst, #8
	-
	-.Ltail63aligned: /* Count in tmp2. */
	- /* Copy up to 7 d-words of data. Similar to Ltail63unaligned, but
	- we know that the src and dest are 64-bit aligned so we can use
	- LDRD/STRD to improve efficiency. */
	- /* TMP2 is now negative, but we don't care about that. The bottom
	- six bits still tell us how many bytes are left to copy. */
	-
	- and tmp1, tmp2, #0x38
	- add dst, dst, tmp1
	- add src, src, tmp1
	- rsb tmp1, tmp1, #(56 - PC_OFFSET + INSN_SIZE)
	- add pc, pc, tmp1
	- ldrd A_l, A_h, [src, #-56] /* 14 words to go. */
	- strd A_l, A_h, [dst, #-56]
	- ldrd A_l, A_h, [src, #-48] /* 12 words to go. */
	- strd A_l, A_h, [dst, #-48]
	- ldrd A_l, A_h, [src, #-40] /* 10 words to go. */
	- strd A_l, A_h, [dst, #-40]
	- ldrd A_l, A_h, [src, #-32] /* 8 words to go. */
	- strd A_l, A_h, [dst, #-32]
	- ldrd A_l, A_h, [src, #-24] /* 6 words to go. */
	- strd A_l, A_h, [dst, #-24]
	- ldrd A_l, A_h, [src, #-16] /* 4 words to go. */
	- strd A_l, A_h, [dst, #-16]
	- ldrd A_l, A_h, [src, #-8] /* 2 words to go. */
	- strd A_l, A_h, [dst, #-8]
	-
	-#endif
	- tst tmp2, #4
	- ldrne tmp1, [src], #4
	- strne tmp1, [dst], #4
	- lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
	- ldrhcs tmp1, [src], #2
	- ldrbne tmp2, [src]
	- strhcs tmp1, [dst], #2
	- strbne tmp2, [dst]
	-
	-.Ldone:
	- ldr tmp2, [sp], #FRAME_SIZE
	- bx lr
	-
	-.Lcpy_body_long: /* Count in tmp2. */
	-
	- /* Long copy. We know that there's at least (prefetch_lines * 64)
	- bytes to go. */
	-#ifdef USE_VFP
	- /* Don't use PLD. Instead, read some data in advance of the current
	- copy position into a register. This should act like a PLD
	- operation but we won't have to repeat the transfer. */
	-
	- vldr d3, [src, #0]
	- vldr d4, [src, #64]
	- vldr d5, [src, #128]
	- vldr d6, [src, #192]
	- vldr d7, [src, #256]
	-
	- vldr d0, [src, #8]
	- vldr d1, [src, #16]
	- vldr d2, [src, #24]
	- add src, src, #32
	-
	- subs tmp2, tmp2, #prefetch_lines * 64 * 2
	- blt 2f
	-1:
	- cpy_line_vfp d3, 0
	- cpy_line_vfp d4, 64
	- cpy_line_vfp d5, 128
	- add dst, dst, #3 * 64
	- add src, src, #3 * 64
	- cpy_line_vfp d6, 0
	- cpy_line_vfp d7, 64
	- add dst, dst, #2 * 64
	- add src, src, #2 * 64
	- subs tmp2, tmp2, #prefetch_lines * 64
	- bge 1b
	-
	-2:
	- cpy_tail_vfp d3, 0
	- cpy_tail_vfp d4, 64
	- cpy_tail_vfp d5, 128
	- add src, src, #3 * 64
	- add dst, dst, #3 * 64
	- cpy_tail_vfp d6, 0
	- vstr d7, [dst, #64]
	- vldr d7, [src, #64]
	- vstr d0, [dst, #64 + 8]
	- vldr d0, [src, #64 + 8]
	- vstr d1, [dst, #64 + 16]
	- vldr d1, [src, #64 + 16]
	- vstr d2, [dst, #64 + 24]
	- vldr d2, [src, #64 + 24]
	- vstr d7, [dst, #64 + 32]
	- add src, src, #96
	- vstr d0, [dst, #64 + 40]
	- vstr d1, [dst, #64 + 48]
	- vstr d2, [dst, #64 + 56]
	- add dst, dst, #128
	- add tmp2, tmp2, #prefetch_lines * 64
	- b .Lcpy_body_medium
	-#else
	- /* Long copy. Use an SMS style loop to maximize the I/O
	- bandwidth of the core. We don't have enough spare registers
	- to synthesise prefetching, so use PLD operations. */
	- /* Pre-bias src and dst. */
	- sub src, src, #8
	- sub dst, dst, #8
	- pld [src, #8]
	- pld [src, #72]
	- subs tmp2, tmp2, #64
	- pld [src, #136]
	- ldrd A_l, A_h, [src, #8]
	- strd B_l, B_h, [sp, #8]
	- ldrd B_l, B_h, [src, #16]
	- strd C_l, C_h, [sp, #16]
	- ldrd C_l, C_h, [src, #24]
	- strd D_l, D_h, [sp, #24]
	- pld [src, #200]
	- ldrd D_l, D_h, [src, #32]!
	- b 1f
	- .p2align 6
	-2:
	- pld [src, #232]
	- strd A_l, A_h, [dst, #40]
	- ldrd A_l, A_h, [src, #40]
	- strd B_l, B_h, [dst, #48]
	- ldrd B_l, B_h, [src, #48]
	- strd C_l, C_h, [dst, #56]
	- ldrd C_l, C_h, [src, #56]
	- strd D_l, D_h, [dst, #64]!
	- ldrd D_l, D_h, [src, #64]!
	- subs tmp2, tmp2, #64
	-1:
	- strd A_l, A_h, [dst, #8]
	- ldrd A_l, A_h, [src, #8]
	- strd B_l, B_h, [dst, #16]
	- ldrd B_l, B_h, [src, #16]
	- strd C_l, C_h, [dst, #24]
	- ldrd C_l, C_h, [src, #24]
	- strd D_l, D_h, [dst, #32]
	- ldrd D_l, D_h, [src, #32]
	- bcs 2b
	- /* Save the remaining bytes and restore the callee-saved regs. */
	- strd A_l, A_h, [dst, #40]
	- add src, src, #40
	- strd B_l, B_h, [dst, #48]
	- ldrd B_l, B_h, [sp, #8]
	- strd C_l, C_h, [dst, #56]
	- ldrd C_l, C_h, [sp, #16]
	- strd D_l, D_h, [dst, #64]
	- ldrd D_l, D_h, [sp, #24]
	- add dst, dst, #72
	- tst tmp2, #0x3f
	- bne .Ltail63aligned
	- ldr tmp2, [sp], #FRAME_SIZE
	- bx lr
	-#endif
	-
	-.Lcpy_notaligned:
	- pld [src]
	- pld [src, #64]
	- /* There's at least 64 bytes to copy, but there is no mutual
	- alignment. */
	- /* Bring DST to 64-bit alignment. */
	- lsls tmp2, dst, #29
	- pld [src, #(2 * 64)]
	- beq 1f
	- rsbs tmp2, tmp2, #0
	- sub count, count, tmp2, lsr #29
	- ldrmi tmp1, [src], #4
	- strmi tmp1, [dst], #4
	- lsls tmp2, tmp2, #2
	- ldrbne tmp1, [src], #1
	- ldrhcs tmp2, [src], #2
	- strbne tmp1, [dst], #1
	- strhcs tmp2, [dst], #2
	-1:
	- pld [src, #(3 * 64)]
	- subs count, count, #64
	- ldrmi tmp2, [sp], #FRAME_SIZE
	- bmi .Ltail63unaligned
	- pld [src, #(4 * 64)]
	-
	-#ifdef USE_NEON
	- vld1.8 {d0-d3}, [src]!
	- vld1.8 {d4-d7}, [src]!
	- subs count, count, #64
	- bmi 2f
	-1:
	- pld [src, #(4 * 64)]
	- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
	- vld1.8 {d0-d3}, [src]!
	- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
	- vld1.8 {d4-d7}, [src]!
	- subs count, count, #64
	- bpl 1b
	-2:
	- vst1.8 {d0-d3}, [ALIGN (dst, 64)]!
	- vst1.8 {d4-d7}, [ALIGN (dst, 64)]!
	- ands count, count, #0x3f
	-#else
	- /* Use an SMS style loop to maximize the I/O bandwidth. */
	- sub src, src, #4
	- sub dst, dst, #8
	- subs tmp2, count, #64 /* Use tmp2 for count. */
	- ldr A_l, [src, #4]
	- ldr A_h, [src, #8]
	- strd B_l, B_h, [sp, #8]
	- ldr B_l, [src, #12]
	- ldr B_h, [src, #16]
	- strd C_l, C_h, [sp, #16]
	- ldr C_l, [src, #20]
	- ldr C_h, [src, #24]
	- strd D_l, D_h, [sp, #24]
	- ldr D_l, [src, #28]
	- ldr D_h, [src, #32]!
	- b 1f
	- .p2align 6
	-2:
	- pld [src, #(5 * 64) - (32 - 4)]
	- strd A_l, A_h, [dst, #40]
	- ldr A_l, [src, #36]
	- ldr A_h, [src, #40]
	- strd B_l, B_h, [dst, #48]
	- ldr B_l, [src, #44]
	- ldr B_h, [src, #48]
	- strd C_l, C_h, [dst, #56]
	- ldr C_l, [src, #52]
	- ldr C_h, [src, #56]
	- strd D_l, D_h, [dst, #64]!
	- ldr D_l, [src, #60]
	- ldr D_h, [src, #64]!
	- subs tmp2, tmp2, #64
	-1:
	- strd A_l, A_h, [dst, #8]
	- ldr A_l, [src, #4]
	- ldr A_h, [src, #8]
	- strd B_l, B_h, [dst, #16]
	- ldr B_l, [src, #12]
	- ldr B_h, [src, #16]
	- strd C_l, C_h, [dst, #24]
	- ldr C_l, [src, #20]
	- ldr C_h, [src, #24]
	- strd D_l, D_h, [dst, #32]
	- ldr D_l, [src, #28]
	- ldr D_h, [src, #32]
	- bcs 2b
	-
	- /* Save the remaining bytes and restore the callee-saved regs. */
	- strd A_l, A_h, [dst, #40]
	- add src, src, #36
	- strd B_l, B_h, [dst, #48]
	- ldrd B_l, B_h, [sp, #8]
	- strd C_l, C_h, [dst, #56]
	- ldrd C_l, C_h, [sp, #16]
	- strd D_l, D_h, [dst, #64]
	- ldrd D_l, D_h, [sp, #24]
	- add dst, dst, #72
	- ands count, tmp2, #0x3f
	-#endif
	- ldr tmp2, [sp], #FRAME_SIZE
	- bne .Ltail63unaligned
	- bx lr
	-
	- .size memcpy, . - memcpy
	diff --git a/contrib/cortex-strings/src/arm/memset.S b/contrib/cortex-strings/src/arm/memset.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/arm/memset.S
	+++ /dev/null
	@@ -1,122 +0,0 @@
	-/* Copyright (c) 2010-2011, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions
	- are met:
	-
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	-
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	-
	- * Neither the name of Linaro Limited nor the names of its
	- contributors may be used to endorse or promote products derived
	- from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/*
	- Written by Dave Gilbert <david.gilbert@linaro.org>
	-
	- This memset routine is optimised on a Cortex-A9 and should work on
	- all ARMv7 processors.
	-
	- */
	-
	- .syntax unified
	- .arch armv7-a
	-
	-@ 2011-08-30 david.gilbert@linaro.org
	-@ Extracted from local git 2f11b436
	-
	-@ this lets us check a flag in a 00/ff byte easily in either endianness
	-#ifdef __ARMEB__
	-#define CHARTSTMASK(c) 1<<(31-(c*8))
	-#else
	-#define CHARTSTMASK(c) 1<<(c*8)
	-#endif
	- .text
	- .thumb
	-
	-@ ---------------------------------------------------------------------------
	- .thumb_func
	- .align 2
	- .p2align 4,,15
	- .global memset
	- .type memset,%function
	-memset:
	- @ r0 = address
	- @ r1 = character
	- @ r2 = count
	- @ returns original address in r0
	-
	- mov r3, r0 @ Leave r0 alone
	- cbz r2, 10f @ Exit if 0 length
	-
	- tst r0, #7
	- beq 2f @ Already aligned
	-
	- @ Ok, so we're misaligned here
	-1:
	- strb r1, [r3], #1
	- subs r2,r2,#1
	- tst r3, #7
	- cbz r2, 10f @ Exit if we hit the end
	- bne 1b @ go round again if still misaligned
	-
	-2:
	- @ OK, so we're aligned
	- push {r4,r5,r6,r7}
	- bics r4, r2, #15 @ if less than 16 bytes then need to finish it off
	- beq 5f
	-
	-3:
	- @ POSIX says that ch is cast to an unsigned char. A uxtb is one
	- @ byte and takes two cycles, where an AND is four bytes but one
	- @ cycle.
	- and r1, #0xFF
	- orr r1, r1, r1, lsl#8 @ Same character into all bytes
	- orr r1, r1, r1, lsl#16
	- mov r5,r1
	- mov r6,r1
	- mov r7,r1
	-
	-4:
	- subs r4,r4,#16
	- stmia r3!,{r1,r5,r6,r7}
	- bne 4b
	- and r2,r2,#15
	-
	- @ At this point we're still aligned and we have upto align-1 bytes left to right
	- @ we can avoid some of the byte-at-a time now by testing for some big chunks
	- tst r2,#8
	- itt ne
	- subne r2,r2,#8
	- stmiane r3!,{r1,r5}
	-
	-5:
	- pop {r4,r5,r6,r7}
	- cbz r2, 10f
	-
	- @ Got to do any last < alignment bytes
	-6:
	- subs r2,r2,#1
	- strb r1,[r3],#1
	- bne 6b
	-
	-10:
	- bx lr @ goodbye
	diff --git a/contrib/cortex-strings/src/arm/strchr.S b/contrib/cortex-strings/src/arm/strchr.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/arm/strchr.S
	+++ /dev/null
	@@ -1,80 +0,0 @@
	-/* Copyright (c) 2010-2011, Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions
	- are met:
	-
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	-
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	-
	- * Neither the name of Linaro Limited nor the names of its
	- contributors may be used to endorse or promote products derived
	- from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/*
	- Written by Dave Gilbert <david.gilbert@linaro.org>
	-
	- A very simple strchr routine, from benchmarks on A9 it's a bit faster than
	- the current version in eglibc (2.12.1-0ubuntu14 package)
	- I don't think doing a word at a time version is worth it since a lot
	- of strchr cases are very short anyway.
	-
	- */
	-
	-@ 2011-02-07 david.gilbert@linaro.org
	-@ Extracted from local git a5b438d861
	-
	- .syntax unified
	- .arch armv7-a
	-
	- .text
	- .thumb
	-
	-@ ---------------------------------------------------------------------------
	-
	- .thumb_func
	- .align 2
	- .p2align 4,,15
	- .global strchr
	- .type strchr,%function
	-strchr:
	- @ r0 = start of string
	- @ r1 = character to match
	- @ returns NULL for no match, or a pointer to the match
	- and r1,r1, #255
	-
	-1:
	- ldrb r2,[r0],#1
	- cmp r2,r1
	- cbz r2,10f
	- bne 1b
	-
	- @ We're here if it matched
	-5:
	- subs r0,r0,#1
	- bx lr
	-
	-10:
	- @ We're here if we ran off the end
	- cmp r1, #0 @ Corner case - you're allowed to search for the nil and get a pointer to it
	- beq 5b @ A bit messy, if it's common we should branch at the start to a special loop
	- mov r0,#0
	- bx lr
	diff --git a/contrib/cortex-strings/src/arm/strcmp.S b/contrib/cortex-strings/src/arm/strcmp.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/arm/strcmp.S
	+++ /dev/null
	@@ -1,500 +0,0 @@
	-/*
	- * Copyright (c) 2012-2014 ARM Ltd
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. The name of the company may not be used to endorse or promote
	- * products derived from this software without specific prior written
	- * permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
	- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
	- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* Implementation of strcmp for ARMv7 when DSP instructions are
	- available. Use ldrd to support wider loads, provided the data
	- is sufficiently aligned. Use saturating arithmetic to optimize
	- the compares. */
	-
	-/* Build Options:
	- STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
	- byte in the string. If comparing completely random strings
	- the pre-check will save time, since there is a very high
	- probability of a mismatch in the first character: we save
	- significant overhead if this is the common case. However,
	- if strings are likely to be identical (eg because we're
	- verifying a hit in a hash table), then this check is largely
	- redundant. */
	-
	-#define STRCMP_NO_PRECHECK 0
	-
	- /* This version uses Thumb-2 code. */
	- .thumb
	- .syntax unified
	-
	-#ifdef __ARM_BIG_ENDIAN
	-#define S2LO lsl
	-#define S2LOEQ lsleq
	-#define S2HI lsr
	-#define MSB 0x000000ff
	-#define LSB 0xff000000
	-#define BYTE0_OFFSET 24
	-#define BYTE1_OFFSET 16
	-#define BYTE2_OFFSET 8
	-#define BYTE3_OFFSET 0
	-#else /* not __ARM_BIG_ENDIAN */
	-#define S2LO lsr
	-#define S2LOEQ lsreq
	-#define S2HI lsl
	-#define BYTE0_OFFSET 0
	-#define BYTE1_OFFSET 8
	-#define BYTE2_OFFSET 16
	-#define BYTE3_OFFSET 24
	-#define MSB 0xff000000
	-#define LSB 0x000000ff
	-#endif /* not __ARM_BIG_ENDIAN */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-/* Parameters and result. */
	-#define src1 r0
	-#define src2 r1
	-#define result r0 /* Overlaps src1. */
	-
	-/* Internal variables. */
	-#define tmp1 r4
	-#define tmp2 r5
	-#define const_m1 r12
	-
	-/* Additional internal variables for 64-bit aligned data. */
	-#define data1a r2
	-#define data1b r3
	-#define data2a r6
	-#define data2b r7
	-#define syndrome_a tmp1
	-#define syndrome_b tmp2
	-
	-/* Additional internal variables for 32-bit aligned data. */
	-#define data1 r2
	-#define data2 r3
	-#define syndrome tmp2
	-
	-
	- /* Macro to compute and return the result value for word-aligned
	- cases. */
	- .macro strcmp_epilogue_aligned synd d1 d2 restore_r6
	-#ifdef __ARM_BIG_ENDIAN
	- /* If data1 contains a zero byte, then syndrome will contain a 1 in
	- bit 7 of that byte. Otherwise, the highest set bit in the
	- syndrome will highlight the first different bit. It is therefore
	- sufficient to extract the eight bits starting with the syndrome
	- bit. */
	- clz tmp1, \synd
	- lsl r1, \d2, tmp1
	- .if \restore_r6
	- ldrd r6, r7, [sp, #8]
	- .endif
	- .cfi_restore 6
	- .cfi_restore 7
	- lsl \d1, \d1, tmp1
	- .cfi_remember_state
	- lsr result, \d1, #24
	- ldrd r4, r5, [sp], #16
	- .cfi_restore 4
	- .cfi_restore 5
	- sub result, result, r1, lsr #24
	- bx lr
	-#else
	- /* To use the big-endian trick we'd have to reverse all three words.
	- that's slower than this approach. */
	- rev \synd, \synd
	- clz tmp1, \synd
	- bic tmp1, tmp1, #7
	- lsr r1, \d2, tmp1
	- .cfi_remember_state
	- .if \restore_r6
	- ldrd r6, r7, [sp, #8]
	- .endif
	- .cfi_restore 6
	- .cfi_restore 7
	- lsr \d1, \d1, tmp1
	- and result, \d1, #255
	- and r1, r1, #255
	- ldrd r4, r5, [sp], #16
	- .cfi_restore 4
	- .cfi_restore 5
	- sub result, result, r1
	-
	- bx lr
	-#endif
	- .endm
	-
	- .text
	- .p2align 5
	-.Lstrcmp_start_addr:
	-#if STRCMP_NO_PRECHECK == 0
	-.Lfastpath_exit:
	- sub r0, r2, r3
	- bx lr
	- nop
	-#endif
	-def_fn strcmp
	-#if STRCMP_NO_PRECHECK == 0
	- ldrb r2, [src1]
	- ldrb r3, [src2]
	- cmp r2, #1
	- it cs
	- cmpcs r2, r3
	- bne .Lfastpath_exit
	-#endif
	- .cfi_startproc
	- strd r4, r5, [sp, #-16]!
	- .cfi_def_cfa_offset 16
	- .cfi_offset 4, -16
	- .cfi_offset 5, -12
	- orr tmp1, src1, src2
	- strd r6, r7, [sp, #8]
	- .cfi_offset 6, -8
	- .cfi_offset 7, -4
	- mvn const_m1, #0
	- lsl r2, tmp1, #29
	- cbz r2, .Lloop_aligned8
	-
	-.Lnot_aligned:
	- eor tmp1, src1, src2
	- tst tmp1, #7
	- bne .Lmisaligned8
	-
	- /* Deal with mutual misalignment by aligning downwards and then
	- masking off the unwanted loaded data to prevent a difference. */
	- and tmp1, src1, #7
	- bic src1, src1, #7
	- and tmp2, tmp1, #3
	- bic src2, src2, #7
	- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
	- ldrd data1a, data1b, [src1], #16
	- tst tmp1, #4
	- ldrd data2a, data2b, [src2], #16
	- /* In thumb code we can't use MVN with a register shift, but
	- we do have ORN. */
	- S2HI tmp1, const_m1, tmp2
	- orn data1a, data1a, tmp1
	- orn data2a, data2a, tmp1
	- beq .Lstart_realigned8
	- orn data1b, data1b, tmp1
	- mov data1a, const_m1
	- orn data2b, data2b, tmp1
	- mov data2a, const_m1
	- b .Lstart_realigned8
	-
	- /* Unwind the inner loop by a factor of 2, giving 16 bytes per
	- pass. */
	- .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
	- .p2align 2 /* Always word aligned. */
	-.Lloop_aligned8:
	- ldrd data1a, data1b, [src1], #16
	- ldrd data2a, data2b, [src2], #16
	-.Lstart_realigned8:
	- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
	- eor syndrome_a, data1a, data2a
	- sel syndrome_a, syndrome_a, const_m1
	- cbnz syndrome_a, .Ldiff_in_a
	- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
	- eor syndrome_b, data1b, data2b
	- sel syndrome_b, syndrome_b, const_m1
	- cbnz syndrome_b, .Ldiff_in_b
	-
	- ldrd data1a, data1b, [src1, #-8]
	- ldrd data2a, data2b, [src2, #-8]
	- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
	- eor syndrome_a, data1a, data2a
	- sel syndrome_a, syndrome_a, const_m1
	- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
	- eor syndrome_b, data1b, data2b
	- sel syndrome_b, syndrome_b, const_m1
	- /* Can't use CBZ for backwards branch. */
	- orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
	- beq .Lloop_aligned8
	-
	-.Ldiff_found:
	- cbnz syndrome_a, .Ldiff_in_a
	-
	-.Ldiff_in_b:
	- strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
	-
	-.Ldiff_in_a:
	- .cfi_restore_state
	- strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
	-
	- .cfi_restore_state
	-.Lmisaligned8:
	- tst tmp1, #3
	- bne .Lmisaligned4
	- ands tmp1, src1, #3
	- bne .Lmutual_align4
	-
	- /* Unrolled by a factor of 2, to reduce the number of post-increment
	- operations. */
	-.Lloop_aligned4:
	- ldr data1, [src1], #8
	- ldr data2, [src2], #8
	-.Lstart_realigned4:
	- uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
	- eor syndrome, data1, data2
	- sel syndrome, syndrome, const_m1
	- cbnz syndrome, .Laligned4_done
	- ldr data1, [src1, #-4]
	- ldr data2, [src2, #-4]
	- uadd8 syndrome, data1, const_m1
	- eor syndrome, data1, data2
	- sel syndrome, syndrome, const_m1
	- cmp syndrome, #0
	- beq .Lloop_aligned4
	-
	-.Laligned4_done:
	- strcmp_epilogue_aligned syndrome, data1, data2, 0
	-
	-.Lmutual_align4:
	- .cfi_restore_state
	- /* Deal with mutual misalignment by aligning downwards and then
	- masking off the unwanted loaded data to prevent a difference. */
	- lsl tmp1, tmp1, #3 /* Bytes -> bits. */
	- bic src1, src1, #3
	- ldr data1, [src1], #8
	- bic src2, src2, #3
	- ldr data2, [src2], #8
	-
	- /* In thumb code we can't use MVN with a register shift, but
	- we do have ORN. */
	- S2HI tmp1, const_m1, tmp1
	- orn data1, data1, tmp1
	- orn data2, data2, tmp1
	- b .Lstart_realigned4
	-
	-.Lmisaligned4:
	- ands tmp1, src1, #3
	- beq .Lsrc1_aligned
	- sub src2, src2, tmp1
	- bic src1, src1, #3
	- lsls tmp1, tmp1, #31
	- ldr data1, [src1], #4
	- beq .Laligned_m2
	- bcs .Laligned_m1
	-
	-#if STRCMP_NO_PRECHECK == 1
	- ldrb data2, [src2, #1]
	- uxtb tmp1, data1, ror #BYTE1_OFFSET
	- subs tmp1, tmp1, data2
	- bne .Lmisaligned_exit
	- cbz data2, .Lmisaligned_exit
	-
	-.Laligned_m2:
	- ldrb data2, [src2, #2]
	- uxtb tmp1, data1, ror #BYTE2_OFFSET
	- subs tmp1, tmp1, data2
	- bne .Lmisaligned_exit
	- cbz data2, .Lmisaligned_exit
	-
	-.Laligned_m1:
	- ldrb data2, [src2, #3]
	- uxtb tmp1, data1, ror #BYTE3_OFFSET
	- subs tmp1, tmp1, data2
	- bne .Lmisaligned_exit
	- add src2, src2, #4
	- cbnz data2, .Lsrc1_aligned
	-#else /* STRCMP_NO_PRECHECK */
	- /* If we've done the pre-check, then we don't need to check the
	- first byte again here. */
	- ldrb data2, [src2, #2]
	- uxtb tmp1, data1, ror #BYTE2_OFFSET
	- subs tmp1, tmp1, data2
	- bne .Lmisaligned_exit
	- cbz data2, .Lmisaligned_exit
	-
	-.Laligned_m2:
	- ldrb data2, [src2, #3]
	- uxtb tmp1, data1, ror #BYTE3_OFFSET
	- subs tmp1, tmp1, data2
	- bne .Lmisaligned_exit
	- cbnz data2, .Laligned_m1
	-#endif
	-
	-.Lmisaligned_exit:
	- .cfi_remember_state
	- mov result, tmp1
	- ldr r4, [sp], #16
	- .cfi_restore 4
	- bx lr
	-
	-#if STRCMP_NO_PRECHECK == 0
	-.Laligned_m1:
	- add src2, src2, #4
	-#endif
	-.Lsrc1_aligned:
	- .cfi_restore_state
	- /* src1 is word aligned, but src2 has no common alignment
	- with it. */
	- ldr data1, [src1], #4
	- lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
	-
	- bic src2, src2, #3
	- ldr data2, [src2], #4
	- bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
	- bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
	-
	- /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */
	-.Loverlap3:
	- bic tmp1, data1, #MSB
	- uadd8 syndrome, data1, const_m1
	- eors syndrome, tmp1, data2, S2LO #8
	- sel syndrome, syndrome, const_m1
	- bne 4f
	- cbnz syndrome, 5f
	- ldr data2, [src2], #4
	- eor tmp1, tmp1, data1
	- cmp tmp1, data2, S2HI #24
	- bne 6f
	- ldr data1, [src1], #4
	- b .Loverlap3
	-4:
	- S2LO data2, data2, #8
	- b .Lstrcmp_tail
	-
	-5:
	- bics syndrome, syndrome, #MSB
	- bne .Lstrcmp_done_equal
	-
	- /* We can only get here if the MSB of data1 contains 0, so
	- fast-path the exit. */
	- ldrb result, [src2]
	- .cfi_remember_state
	- ldrd r4, r5, [sp], #16
	- .cfi_restore 4
	- .cfi_restore 5
	- /* R6/7 Not used in this sequence. */
	- .cfi_restore 6
	- .cfi_restore 7
	- neg result, result
	- bx lr
	-
	-6:
	- .cfi_restore_state
	- S2LO data1, data1, #24
	- and data2, data2, #LSB
	- b .Lstrcmp_tail
	-
	- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
	-.Loverlap2:
	- and tmp1, data1, const_m1, S2LO #16
	- uadd8 syndrome, data1, const_m1
	- eors syndrome, tmp1, data2, S2LO #16
	- sel syndrome, syndrome, const_m1
	- bne 4f
	- cbnz syndrome, 5f
	- ldr data2, [src2], #4
	- eor tmp1, tmp1, data1
	- cmp tmp1, data2, S2HI #16
	- bne 6f
	- ldr data1, [src1], #4
	- b .Loverlap2
	-4:
	- S2LO data2, data2, #16
	- b .Lstrcmp_tail
	-5:
	- ands syndrome, syndrome, const_m1, S2LO #16
	- bne .Lstrcmp_done_equal
	-
	- ldrh data2, [src2]
	- S2LO data1, data1, #16
	-#ifdef __ARM_BIG_ENDIAN
	- lsl data2, data2, #16
	-#endif
	- b .Lstrcmp_tail
	-
	-6:
	- S2LO data1, data1, #16
	- and data2, data2, const_m1, S2LO #16
	- b .Lstrcmp_tail
	-
	- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
	-.Loverlap1:
	- and tmp1, data1, #LSB
	- uadd8 syndrome, data1, const_m1
	- eors syndrome, tmp1, data2, S2LO #24
	- sel syndrome, syndrome, const_m1
	- bne 4f
	- cbnz syndrome, 5f
	- ldr data2, [src2], #4
	- eor tmp1, tmp1, data1
	- cmp tmp1, data2, S2HI #8
	- bne 6f
	- ldr data1, [src1], #4
	- b .Loverlap1
	-4:
	- S2LO data2, data2, #24
	- b .Lstrcmp_tail
	-5:
	- tst syndrome, #LSB
	- bne .Lstrcmp_done_equal
	- ldr data2, [src2]
	-6:
	- S2LO data1, data1, #8
	- bic data2, data2, #MSB
	- b .Lstrcmp_tail
	-
	-.Lstrcmp_done_equal:
	- mov result, #0
	- .cfi_remember_state
	- ldrd r4, r5, [sp], #16
	- .cfi_restore 4
	- .cfi_restore 5
	- /* R6/7 not used in this sequence. */
	- .cfi_restore 6
	- .cfi_restore 7
	- bx lr
	-
	-.Lstrcmp_tail:
	- .cfi_restore_state
	-#ifndef __ARM_BIG_ENDIAN
	- rev data1, data1
	- rev data2, data2
	- /* Now everything looks big-endian... */
	-#endif
	- uadd8 tmp1, data1, const_m1
	- eor tmp1, data1, data2
	- sel syndrome, tmp1, const_m1
	- clz tmp1, syndrome
	- lsl data1, data1, tmp1
	- lsl data2, data2, tmp1
	- lsr result, data1, #24
	- ldrd r4, r5, [sp], #16
	- .cfi_restore 4
	- .cfi_restore 5
	- /* R6/7 not used in this sequence. */
	- .cfi_restore 6
	- .cfi_restore 7
	- sub result, result, data2, lsr #24
	- bx lr
	- .cfi_endproc
	- .size strcmp, . - .Lstrcmp_start_addr
	diff --git a/contrib/cortex-strings/src/thumb-2/strcpy.c b/contrib/cortex-strings/src/thumb-2/strcpy.c
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/thumb-2/strcpy.c
	+++ /dev/null
	@@ -1,173 +0,0 @@
	-/*
	- * Copyright (c) 2008 ARM Ltd
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. The name of the company may not be used to endorse or promote
	- * products derived from this software without specific prior written
	- * permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
	- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
	- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* For GLIBC:
	-#include <string.h>
	-#include <memcopy.h>
	-
	-#undef strcmp
	-*/
	-
	-#ifdef __thumb2__
	-#define magic1(REG) "#0x01010101"
	-#define magic2(REG) "#0x80808080"
	-#else
	-#define magic1(REG) #REG
	-#define magic2(REG) #REG ", lsl #7"
	-#endif
	-
	-char* __attribute__((naked))
	-strcpy (char* dst, const char* src)
	-{
	- asm (
	-#if !(defined(__OPTIMIZE_SIZE__) \|\| defined (PREFER_SIZE_OVER_SPEED) \|\| \
	- (defined (__thumb__) && !defined (__thumb2__)))
	- "pld [r1, #0]\n\t"
	- "eor r2, r0, r1\n\t"
	- "mov ip, r0\n\t"
	- "tst r2, #3\n\t"
	- "bne 4f\n\t"
	- "tst r1, #3\n\t"
	- "bne 3f\n"
	- "5:\n\t"
	-#ifndef __thumb2__
	- "str r5, [sp, #-4]!\n\t"
	- "mov r5, #0x01\n\t"
	- "orr r5, r5, r5, lsl #8\n\t"
	- "orr r5, r5, r5, lsl #16\n\t"
	-#endif
	-
	- "str r4, [sp, #-4]!\n\t"
	- "tst r1, #4\n\t"
	- "ldr r3, [r1], #4\n\t"
	- "beq 2f\n\t"
	- "sub r2, r3, "magic1(r5)"\n\t"
	- "bics r2, r2, r3\n\t"
	- "tst r2, "magic2(r5)"\n\t"
	- "itt eq\n\t"
	- "streq r3, [ip], #4\n\t"
	- "ldreq r3, [r1], #4\n"
	- "bne 1f\n\t"
	- /* Inner loop. We now know that r1 is 64-bit aligned, so we
	- can safely fetch up to two words. This allows us to avoid
	- load stalls. */
	- ".p2align 2\n"
	- "2:\n\t"
	- "pld [r1, #8]\n\t"
	- "ldr r4, [r1], #4\n\t"
	- "sub r2, r3, "magic1(r5)"\n\t"
	- "bics r2, r2, r3\n\t"
	- "tst r2, "magic2(r5)"\n\t"
	- "sub r2, r4, "magic1(r5)"\n\t"
	- "bne 1f\n\t"
	- "str r3, [ip], #4\n\t"
	- "bics r2, r2, r4\n\t"
	- "tst r2, "magic2(r5)"\n\t"
	- "itt eq\n\t"
	- "ldreq r3, [r1], #4\n\t"
	- "streq r4, [ip], #4\n\t"
	- "beq 2b\n\t"
	- "mov r3, r4\n"
	- "1:\n\t"
	-#ifdef __ARMEB__
	- "rors r3, r3, #24\n\t"
	-#endif
	- "strb r3, [ip], #1\n\t"
	- "tst r3, #0xff\n\t"
	-#ifdef __ARMEL__
	- "ror r3, r3, #8\n\t"
	-#endif
	- "bne 1b\n\t"
	- "ldr r4, [sp], #4\n\t"
	-#ifndef __thumb2__
	- "ldr r5, [sp], #4\n\t"
	-#endif
	- "BX LR\n"
	-
	- /* Strings have the same offset from word alignment, but it's
	- not zero. */
	- "3:\n\t"
	- "tst r1, #1\n\t"
	- "beq 1f\n\t"
	- "ldrb r2, [r1], #1\n\t"
	- "strb r2, [ip], #1\n\t"
	- "cmp r2, #0\n\t"
	- "it eq\n"
	- "BXEQ LR\n"
	- "1:\n\t"
	- "tst r1, #2\n\t"
	- "beq 5b\n\t"
	- "ldrh r2, [r1], #2\n\t"
	-#ifdef __ARMEB__
	- "tst r2, #0xff00\n\t"
	- "iteet ne\n\t"
	- "strneh r2, [ip], #2\n\t"
	- "lsreq r2, r2, #8\n\t"
	- "streqb r2, [ip]\n\t"
	- "tstne r2, #0xff\n\t"
	-#else
	- "tst r2, #0xff\n\t"
	- "itet ne\n\t"
	- "strneh r2, [ip], #2\n\t"
	- "streqb r2, [ip]\n\t"
	- "tstne r2, #0xff00\n\t"
	-#endif
	- "bne 5b\n\t"
	- "BX LR\n"
	-
	- /* src and dst do not have a common word-alignement. Fall back to
	- byte copying. */
	- "4:\n\t"
	- "ldrb r2, [r1], #1\n\t"
	- "strb r2, [ip], #1\n\t"
	- "cmp r2, #0\n\t"
	- "bne 4b\n\t"
	- "BX LR"
	-
	-#elif !defined (__thumb__) \|\| defined (__thumb2__)
	- "mov r3, r0\n\t"
	- "1:\n\t"
	- "ldrb r2, [r1], #1\n\t"
	- "strb r2, [r3], #1\n\t"
	- "cmp r2, #0\n\t"
	- "bne 1b\n\t"
	- "BX LR"
	-#else
	- "mov r3, r0\n\t"
	- "1:\n\t"
	- "ldrb r2, [r1]\n\t"
	- "add r1, r1, #1\n\t"
	- "strb r2, [r3]\n\t"
	- "add r3, r3, #1\n\t"
	- "cmp r2, #0\n\t"
	- "bne 1b\n\t"
	- "BX LR"
	-#endif
	- );
	-}
	-/* For GLIBC: libc_hidden_builtin_def (strcpy) */
	diff --git a/contrib/cortex-strings/src/thumb-2/strlen.S b/contrib/cortex-strings/src/thumb-2/strlen.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/thumb-2/strlen.S
	+++ /dev/null
	@@ -1,150 +0,0 @@
	-/* Copyright (c) 2010-2011,2013 Linaro Limited
	- All rights reserved.
	-
	- Redistribution and use in source and binary forms, with or without
	- modification, are permitted provided that the following conditions
	- are met:
	-
	- * Redistributions of source code must retain the above copyright
	- notice, this list of conditions and the following disclaimer.
	-
	- * Redistributions in binary form must reproduce the above copyright
	- notice, this list of conditions and the following disclaimer in the
	- documentation and/or other materials provided with the distribution.
	-
	- * Neither the name of Linaro Limited nor the names of its
	- contributors may be used to endorse or promote products derived
	- from this software without specific prior written permission.
	-
	- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/*
	- Assumes:
	- ARMv6T2, AArch32
	-
	- */
	-
	- .macro def_fn f p2align=0
	- .text
	- .p2align \p2align
	- .global \f
	- .type \f, %function
	-\f:
	- .endm
	-
	-#ifdef __ARMEB__
	-#define S2LO lsl
	-#define S2HI lsr
	-#else
	-#define S2LO lsr
	-#define S2HI lsl
	-#endif
	-
	- /* This code requires Thumb. */
	- .thumb
	- .syntax unified
	-
	-/* Parameters and result. */
	-#define srcin r0
	-#define result r0
	-
	-/* Internal variables. */
	-#define src r1
	-#define data1a r2
	-#define data1b r3
	-#define const_m1 r12
	-#define const_0 r4
	-#define tmp1 r4 /* Overlaps const_0 */
	-#define tmp2 r5
	-
	-def_fn strlen p2align=6
	- pld [srcin, #0]
	- strd r4, r5, [sp, #-8]!
	- bic src, srcin, #7
	- mvn const_m1, #0
	- ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
	- pld [src, #32]
	- bne.w .Lmisaligned8
	- mov const_0, #0
	- mov result, #-8
	-.Lloop_aligned:
	- /* Bytes 0-7. */
	- ldrd data1a, data1b, [src]
	- pld [src, #64]
	- add result, result, #8
	-.Lstart_realigned:
	- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
	- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
	- uadd8 data1b, data1b, const_m1
	- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
	- cbnz data1b, .Lnull_found
	-
	- /* Bytes 8-15. */
	- ldrd data1a, data1b, [src, #8]
	- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
	- add result, result, #8
	- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
	- uadd8 data1b, data1b, const_m1
	- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
	- cbnz data1b, .Lnull_found
	-
	- /* Bytes 16-23. */
	- ldrd data1a, data1b, [src, #16]
	- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
	- add result, result, #8
	- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
	- uadd8 data1b, data1b, const_m1
	- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
	- cbnz data1b, .Lnull_found
	-
	- /* Bytes 24-31. */
	- ldrd data1a, data1b, [src, #24]
	- add src, src, #32
	- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
	- add result, result, #8
	- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
	- uadd8 data1b, data1b, const_m1
	- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
	- cmp data1b, #0
	- beq .Lloop_aligned
	-
	-.Lnull_found:
	- cmp data1a, #0
	- itt eq
	- addeq result, result, #4
	- moveq data1a, data1b
	-#ifndef __ARMEB__
	- rev data1a, data1a
	-#endif
	- clz data1a, data1a
	- ldrd r4, r5, [sp], #8
	- add result, result, data1a, lsr #3 /* Bits -> Bytes. */
	- bx lr
	-
	-.Lmisaligned8:
	- ldrd data1a, data1b, [src]
	- and tmp2, tmp1, #3
	- rsb result, tmp1, #0
	- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
	- tst tmp1, #4
	- pld [src, #64]
	- S2HI tmp2, const_m1, tmp2
	- orn data1a, data1a, tmp2
	- itt ne
	- ornne data1b, data1b, tmp2
	- movne data1a, const_m1
	- mov const_0, #0
	- b .Lstart_realigned
	- .size strlen, . - strlen
	-
	diff --git a/contrib/cortex-strings/src/thumb/aeabi_idiv.S b/contrib/cortex-strings/src/thumb/aeabi_idiv.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/thumb/aeabi_idiv.S
	+++ /dev/null
	@@ -1,318 +0,0 @@
	-/*
	- * Copyright (c) 2014 ARM Ltd
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. The name of the company may not be used to endorse or promote
	- * products derived from this software without specific prior written
	- * permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
	- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
	- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* An executable stack is not required for these functions. */
	-
	-.section .note.GNU-stack,"",%progbits
	-.previous
	-.eabi_attribute 25, 1
	-
	-/* ANSI concatenation macros. */
	-
	-#define CONCAT1(a, b) CONCAT2(a, b)
	-#define CONCAT2(a, b) a ## b
	-
	-/* Use the right prefix for global labels. */
	-
	-#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
	-
	-#define TYPE(x) .type SYM(x),function
	-#define SIZE(x) .size SYM(x), . - SYM(x)
	-#define LSYM(x) .x
	-
	-.macro cfi_start start_label, end_label
	- .pushsection .debug_frame
	-LSYM(Lstart_frame):
	- .4byte LSYM(Lend_cie) - LSYM(Lstart_cie)
	-LSYM(Lstart_cie):
	- .4byte 0xffffffff
	- .byte 0x1
	- .ascii "\0"
	- .uleb128 0x1
	- .sleb128 -4
	- .byte 0xe
	- .byte 0xc
	- .uleb128 0xd
	- .uleb128 0x0
	-
	- .align 2
	-LSYM(Lend_cie):
	- .4byte LSYM(Lend_fde)-LSYM(Lstart_fde)
	-LSYM(Lstart_fde):
	- .4byte LSYM(Lstart_frame)
	- .4byte \start_label
	- .4byte \end_label-\start_label
	- .popsection
	-.endm
	-
	-.macro cfi_end end_label
	- .pushsection .debug_frame
	- .align 2
	-LSYM(Lend_fde):
	- .popsection
	-\end_label:
	-.endm
	-
	-.macro THUMB_LDIV0 name signed
	- push {r0, lr}
	- movs r0, #0
	- bl SYM(__aeabi_idiv0)
	- pop {r1, pc}
	-.endm
	-
	-.macro FUNC_END name
	- SIZE (__\name)
	-.endm
	-
	-.macro DIV_FUNC_END name signed
	- cfi_start __\name, LSYM(Lend_div0)
	-LSYM(Ldiv0):
	- THUMB_LDIV0 \name \signed
	- cfi_end LSYM(Lend_div0)
	- FUNC_END \name
	-.endm
	-
	-.macro THUMB_FUNC_START name
	- .globl SYM (\name)
	- TYPE (\name)
	- .thumb_func
	-SYM (\name):
	-.endm
	-
	-.macro FUNC_START name
	- .text
	- .globl SYM (__\name)
	- TYPE (__\name)
	- .align 0
	- .force_thumb
	- .thumb_func
	- .syntax unified
	-SYM (__\name):
	-.endm
	-
	-.macro FUNC_ALIAS new old
	- .globl SYM (__\new)
	- .thumb_set SYM (__\new), SYM (__\old)
	-.endm
	-
	-/* Register aliases. */
	-work .req r4
	-dividend .req r0
	-divisor .req r1
	-overdone .req r2
	-result .req r2
	-curbit .req r3
	-
	-/* ------------------------------------------------------------------------ */
	-/* Bodies of the division and modulo routines. */
	-/* ------------------------------------------------------------------------ */
	-.macro BranchToDiv n, label
	- lsrs curbit, dividend, \n
	- cmp curbit, divisor
	- bcc \label
	-.endm
	-
	-.macro DoDiv n
	- lsrs curbit, dividend, \n
	- cmp curbit, divisor
	- bcc 1f
	- lsls curbit, divisor, \n
	- subs dividend, dividend, curbit
	-
	-1: adcs result, result
	-.endm
	-
	-.macro THUMB1_Div_Positive
	- movs result, #0
	- BranchToDiv #1, LSYM(Lthumb1_div1)
	- BranchToDiv #4, LSYM(Lthumb1_div4)
	- BranchToDiv #8, LSYM(Lthumb1_div8)
	- BranchToDiv #12, LSYM(Lthumb1_div12)
	- BranchToDiv #16, LSYM(Lthumb1_div16)
	-LSYM(Lthumb1_div_large_positive):
	- movs result, #0xff
	- lsls divisor, divisor, #8
	- rev result, result
	- lsrs curbit, dividend, #16
	- cmp curbit, divisor
	- bcc 1f
	- asrs result, #8
	- lsls divisor, divisor, #8
	- beq LSYM(Ldivbyzero_waypoint)
	-
	-1: lsrs curbit, dividend, #12
	- cmp curbit, divisor
	- bcc LSYM(Lthumb1_div12)
	- b LSYM(Lthumb1_div16)
	-LSYM(Lthumb1_div_loop):
	- lsrs divisor, divisor, #8
	-LSYM(Lthumb1_div16):
	- Dodiv #15
	- Dodiv #14
	- Dodiv #13
	- Dodiv #12
	-LSYM(Lthumb1_div12):
	- Dodiv #11
	- Dodiv #10
	- Dodiv #9
	- Dodiv #8
	- bcs LSYM(Lthumb1_div_loop)
	-LSYM(Lthumb1_div8):
	- Dodiv #7
	- Dodiv #6
	- Dodiv #5
	-LSYM(Lthumb1_div5):
	- Dodiv #4
	-LSYM(Lthumb1_div4):
	- Dodiv #3
	-LSYM(Lthumb1_div3):
	- Dodiv #2
	-LSYM(Lthumb1_div2):
	- Dodiv #1
	-LSYM(Lthumb1_div1):
	- subs divisor, dividend, divisor
	- bcs 1f
	- mov divisor, dividend
	-
	-1: adcs result, result
	- mov dividend, result
	- bx lr
	-
	-LSYM(Ldivbyzero_waypoint):
	- b LSYM(Ldiv0)
	-.endm
	-
	-.macro THUMB1_Div_Negative
	- lsrs result, divisor, #31
	- beq 1f
	- rsbs divisor, divisor, #0
	-
	-1: asrs curbit, dividend, #32
	- bcc 2f
	- rsbs dividend, dividend, #0
	-
	-2: eors curbit, result
	- movs result, #0
	- mov ip, curbit
	- BranchToDiv #4, LSYM(Lthumb1_div_negative4)
	- BranchToDiv #8, LSYM(Lthumb1_div_negative8)
	-LSYM(Lthumb1_div_large):
	- movs result, #0xfc
	- lsls divisor, divisor, #6
	- rev result, result
	- lsrs curbit, dividend, #8
	- cmp curbit, divisor
	- bcc LSYM(Lthumb1_div_negative8)
	-
	- lsls divisor, divisor, #6
	- asrs result, result, #6
	- cmp curbit, divisor
	- bcc LSYM(Lthumb1_div_negative8)
	-
	- lsls divisor, divisor, #6
	- asrs result, result, #6
	- cmp curbit, divisor
	- bcc LSYM(Lthumb1_div_negative8)
	-
	- lsls divisor, divisor, #6
	- beq LSYM(Ldivbyzero_negative)
	- asrs result, result, #6
	- b LSYM(Lthumb1_div_negative8)
	-LSYM(Lthumb1_div_negative_loop):
	- lsrs divisor, divisor, #6
	-LSYM(Lthumb1_div_negative8):
	- DoDiv #7
	- DoDiv #6
	- DoDiv #5
	- DoDiv #4
	-LSYM(Lthumb1_div_negative4):
	- DoDiv #3
	- DoDiv #2
	- bcs LSYM(Lthumb1_div_negative_loop)
	- DoDiv #1
	- subs divisor, dividend, divisor
	- bcs 1f
	- mov divisor, dividend
	-
	-1: mov curbit, ip
	- adcs result, result
	- asrs curbit, curbit, #1
	- mov dividend, result
	- bcc 2f
	- rsbs dividend, dividend, #0
	- cmp curbit, #0
	-
	-2: bpl 3f
	- rsbs divisor, divisor, #0
	-
	-3: bx lr
	-
	-LSYM(Ldivbyzero_negative):
	- mov curbit, ip
	- asrs curbit, curbit, #1
	- bcc LSYM(Ldiv0)
	- rsbs dividend, dividend, #0
	-.endm
	-
	-/* ------------------------------------------------------------------------ */
	-/* Start of the Real Functions */
	-/* ------------------------------------------------------------------------ */
	-
	- FUNC_START aeabi_idiv0
	- bx lr
	- FUNC_END aeabi_idiv0
	-
	- FUNC_START divsi3
	- FUNC_ALIAS aeabi_idiv divsi3
	-
	-LSYM(divsi3_skip_div0_test):
	- mov curbit, dividend
	- orrs curbit, divisor
	- bmi LSYM(Lthumb1_div_negative)
	-
	-LSYM(Lthumb1_div_positive):
	- THUMB1_Div_Positive
	-
	-LSYM(Lthumb1_div_negative):
	- THUMB1_Div_Negative
	-
	- DIV_FUNC_END divsi3 signed
	-
	- FUNC_START aeabi_idivmod
	-
	- cmp r1, #0
	- beq LSYM(Ldiv0)
	- push {r0, r1, lr}
	- bl LSYM(divsi3_skip_div0_test)
	- POP {r1, r2, r3}
	- mul r2, r0
	- sub r1, r1, r2
	- bx r3
	-
	- FUNC_END aeabi_idivmod
	-/* ------------------------------------------------------------------------ */
	diff --git a/contrib/cortex-strings/src/thumb/strcmp-armv6m.S b/contrib/cortex-strings/src/thumb/strcmp-armv6m.S
	deleted file mode 100644
	--- a/contrib/cortex-strings/src/thumb/strcmp-armv6m.S
	+++ /dev/null
	@@ -1,143 +0,0 @@
	-/*
	- * Copyright (c) 2014 ARM Ltd
	- * All rights reserved.
	- *
	- * Redistribution and use in source and binary forms, with or without
	- * modification, are permitted provided that the following conditions
	- * are met:
	- * 1. Redistributions of source code must retain the above copyright
	- * notice, this list of conditions and the following disclaimer.
	- * 2. Redistributions in binary form must reproduce the above copyright
	- * notice, this list of conditions and the following disclaimer in the
	- * documentation and/or other materials provided with the distribution.
	- * 3. The name of the company may not be used to endorse or promote
	- * products derived from this software without specific prior written
	- * permission.
	- *
	- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
	- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
	- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
	- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	- */
	-
	-/* Implementation of strcmp for ARMv6m. This version is only used in
	- ARMv6-M when we want an efficient implementation. Otherwize if the
	- code size is preferred, strcmp-armv4t.S will be used. */
	-
	- .thumb_func
	- .syntax unified
	- .arch armv6-m
	-
	- .macro DoSub n, label
	- subs r0, r0, r1
	-#ifdef __ARM_BIG_ENDIAN
	- lsrs r1, r4, \n
	-#else
	- lsls r1, r4, \n
	-#endif
	- orrs r1, r0
	- bne \label
	- .endm
	-
	- .macro Byte_Test n, label
	- lsrs r0, r2, \n
	- lsrs r1, r3, \n
	- DoSub \n, \label
	- .endm
	-
	- .text
	- .p2align 0
	- .global strcmp
	- .type strcmp, %function
	-strcmp:
	- .cfi_startproc
	- mov r2, r0
	- push {r4, r5, r6, lr}
	- orrs r2, r1
	- lsls r2, r2, #30
	- bne 6f
	- ldr r5, =0x01010101
	- lsls r6, r5, #7
	-1:
	- ldmia r0!, {r2}
	- ldmia r1!, {r3}
	- subs r4, r2, r5
	- bics r4, r2
	- ands r4, r6
	- beq 3f
	-
	-#ifdef __ARM_BIG_ENDIAN
	- Byte_Test #24, 4f
	- Byte_Test #16, 4f
	- Byte_Test #8, 4f
	-
	- b 7f
	-3:
	- cmp r2, r3
	- beq 1b
	- cmp r2, r3
	-#else
	- uxtb r0, r2
	- uxtb r1, r3
	- DoSub #24, 2f
	-
	- uxth r0, r2
	- uxth r1, r3
	- DoSub #16, 2f
	-
	- lsls r0, r2, #8
	- lsls r1, r3, #8
	- lsrs r0, r0, #8
	- lsrs r1, r1, #8
	- DoSub #8, 2f
	-
	- lsrs r0, r2, #24
	- lsrs r1, r3, #24
	- subs r0, r0, r1
	-2:
	- pop {r4, r5, r6, pc}
	-
	-3:
	- cmp r2, r3
	- beq 1b
	- rev r0, r2
	- rev r1, r3
	- cmp r0, r1
	-#endif
	-
	- bls 5f
	- movs r0, #1
	-4:
	- pop {r4, r5, r6, pc}
	-5:
	- movs r0, #0
	- mvns r0, r0
	- pop {r4, r5, r6, pc}
	-6:
	- ldrb r2, [r0, #0]
	- ldrb r3, [r1, #0]
	- adds r0, #1
	- adds r1, #1
	- cmp r2, #0
	- beq 7f
	- cmp r2, r3
	- bne 7f
	- ldrb r2, [r0, #0]
	- ldrb r3, [r1, #0]
	- adds r0, #1
	- adds r1, #1
	- cmp r2, #0
	- beq 7f
	- cmp r2, r3
	- beq 6b
	-7:
	- subs r0, r2, r3
	- pop {r4, r5, r6, pc}
	- .cfi_endproc
	- .size strcmp, . - strcmp

File Metadata

Mime Type: text/plain
Expires: Thu, Feb 6, 9:43 AM (20 h, 51 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16491370
Default Alt Text: D48500.diff (237 KB)

D48500.diffNo OneTemporaryActions

D48500.diffView Options

File Metadata

Event Timeline

D48500.diff
No OneTemporary
Actions

D48500.diff
View Options