Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F115912119
D45670.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
37 KB
Referenced Files
None
Subscribers
None
D45670.diff
View Options
diff --git a/lib/libc/Makefile b/lib/libc/Makefile
--- a/lib/libc/Makefile
+++ b/lib/libc/Makefile
@@ -109,7 +109,7 @@
.include "${LIBC_SRCTOP}/inet/Makefile.inc"
.include "${LIBC_SRCTOP}/isc/Makefile.inc"
.include "${LIBC_SRCTOP}/locale/Makefile.inc"
-.include "${LIBC_SRCTOP}/md/Makefile.inc"
+.include "${SRCTOP}/lib/libmd/Makefile.md5.inc"
.include "${LIBC_SRCTOP}/nameser/Makefile.inc"
.include "${LIBC_SRCTOP}/net/Makefile.inc"
.include "${LIBC_SRCTOP}/nls/Makefile.inc"
diff --git a/lib/libc/md/Makefile.inc b/lib/libc/md/Makefile.inc
deleted file mode 100644
--- a/lib/libc/md/Makefile.inc
+++ /dev/null
@@ -1,3 +0,0 @@
-.PATH: ${SRCTOP}/sys/kern
-
-SRCS+= md5c.c
diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile
--- a/lib/libmd/Makefile
+++ b/lib/libmd/Makefile
@@ -6,7 +6,7 @@
PACKAGE= runtime
LIB= md
SHLIB_MAJOR= 7
-SRCS= md4c.c md5c.c md4hl.c md5hl.c \
+SRCS= md4c.c md4hl.c md5hl.c \
rmd160c.c rmd160hl.c \
sha0c.c sha0hl.c sha1c.c sha1hl.c \
sha224hl.c sha256c.c sha256hl.c \
@@ -150,6 +150,8 @@
.endif
.endif # ${USE_ASM_SOURCES} != 0
+.include "Makefile.md5.inc"
+
md4hl.c: mdXhl.c
(echo '#define LENGTH 16'; \
sed -e 's/mdX/md4/g' -e 's/MDX/MD4/g' ${.ALLSRC}) > ${.TARGET}
diff --git a/lib/libmd/Makefile.md5.inc b/lib/libmd/Makefile.md5.inc
new file mode 100644
--- /dev/null
+++ b/lib/libmd/Makefile.md5.inc
@@ -0,0 +1,17 @@
+# include the MD5 hash function into the build
+
+.PATH: ${SRCTOP}/sys/crypto/md5
+
+SRCS+= md5c.c
+
+USE_ASM_SOURCES?= 1
+
+.if ${USE_ASM_SOURCES} != 0 && !defined(BOOTSTRAPPING) && ${MK_MACHDEP_OPTIMIZATIONS} != no
+.if exists(md5block_${MACHINE_ARCH}.S)
+SRCS+= md5block_${MACHINE_ARCH}.S
+CFLAGS.md5c.c+= -DMD5_ASM
+.if exists(md5dispatch_${MACHINE_ARCH}.c)
+SRCS+= md5dispatch_${MACHINE_ARCH}.c
+.endif
+.endif
+.endif
diff --git a/stand/libsa/Makefile b/stand/libsa/Makefile
--- a/stand/libsa/Makefile
+++ b/stand/libsa/Makefile
@@ -189,9 +189,9 @@
.PATH: ${SYSDIR}/crypto/sha2
SRCS+= sha256c.c sha512c.c
-# md5 from the kernel
-.PATH: ${SYSDIR}/kern
-SRCS+= md5c.c
+# md5 from the kernel, but avoid SIMD implementations
+USE_ASM_SOURCES=0
+.include "${SRCTOP}/lib/libmd/Makefile.md5.inc"
.if ${DO32:U0} == 0
MAN=libsa.3
diff --git a/sys/conf/Makefile.amd64 b/sys/conf/Makefile.amd64
--- a/sys/conf/Makefile.amd64
+++ b/sys/conf/Makefile.amd64
@@ -35,6 +35,8 @@
CFLAGS+= -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer
.endif
+CFLAGS.md5c.c+= -DMD5_ASM
+
%BEFORE_DEPEND
%OBJS
diff --git a/sys/conf/Makefile.arm64 b/sys/conf/Makefile.arm64
--- a/sys/conf/Makefile.arm64
+++ b/sys/conf/Makefile.arm64
@@ -60,6 +60,8 @@
CFLAGS += -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer
.endif
+CFLAGS.md5c.c+= -DMD5_ASM
+
%BEFORE_DEPEND
%OBJS
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -697,6 +697,7 @@
compile-with "${NORMAL_C} -I$S/contrib/libsodium/src/libsodium/include -I$S/crypto/libsodium"
crypto/des/des_ecb.c optional netsmb
crypto/des/des_setkey.c optional netsmb
+crypto/md5/md5c.c standard
crypto/openssl/ossl.c optional ossl
crypto/openssl/ossl_aes.c optional ossl
crypto/openssl/ossl_chacha20.c optional ossl
@@ -3871,7 +3872,6 @@
kern/link_elf.c standard
kern/linker_if.m standard
kern/md4c.c optional netsmb
-kern/md5c.c standard
kern/p1003_1b.c standard
kern/posix4_mib.c standard
kern/sched_4bsd.c optional sched_4bsd
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -98,6 +98,8 @@
cddl/dev/dtrace/amd64/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}"
crypto/aesni/aeskeys_amd64.S optional aesni
crypto/des/des_enc.c optional netsmb
+crypto/md5/md5block_amd64.S standard
+crypto/md5/md5dispatch_amd64.c standard
crypto/openssl/amd64/aes-gcm-avx512.S optional ossl
crypto/openssl/amd64/aesni-x86_64.S optional ossl
crypto/openssl/amd64/aesni-gcm-x86_64.S optional ossl
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -171,6 +171,7 @@
clean "ghashv8-armx.o"
crypto/des/des_enc.c optional netsmb
+crypto/md5/md5block_aarch64.S standard
crypto/openssl/ossl_aarch64.c optional ossl
crypto/openssl/aarch64/chacha-armv8.S optional ossl \
compile-with "${CC} -c ${CFLAGS:N-mgeneral-regs-only} -I$S/crypto/openssl ${WERROR} ${.IMPSRC}"
diff --git a/sys/crypto/md5/md5block_aarch64.S b/sys/crypto/md5/md5block_aarch64.S
new file mode 100644
--- /dev/null
+++ b/sys/crypto/md5/md5block_aarch64.S
@@ -0,0 +1,213 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/elf_common.h>
+#include <machine/asm.h>
+
+.macro round a, b, c, d, f, k, m, s
+ \f f, \b, \c, \d
+.if 0x100000000 - \k > 0x00ffffff
+ movz k, #\k & 0xffff
+ movk k, #\k >> 16, lsl #16
+ add k, k, \m // k[i] + m[g]
+.elseif 0x100000000 - \k > 0x0000ffff
+ sub k, \m, #(0x100000000 - \k) & 0xfff000
+ sub k, k, #(0x100000000 - \k) & 0xfff
+.else
+ movz k, #0x100000000 - \k
+ sub k, \m, k
+.endif
+ add \a, \a, k // k[i] + m[g] + a
+ add \a, \a, f // k[i] + m[g] + a + f
+ ror \a, \a, #32-\s
+ add \a, \a, \b
+.endm
+
+ /* f = b ? c : d */
+.macro f0 f, b, c, d
+ eor \f, \c, \d
+ and \f, \f, \b
+ eor \f, \f, \d
+.endm
+
+ /*
+ * special cased round 1 function
+ * f1 = d ? b : c = (d & b) + (~d & c)
+ */
+.macro round1 a, b, c, d, k, m, s
+ bic tmp, \c, \d // ~d & c
+.if 0x100000000 - \k > 0x00ffffff
+ movz k, #\k & 0xffff
+ movk k, #\k >> 16, lsl #16
+ add k, k, \m // k[i] + m[g]
+.elseif 0x100000000 - \k > 0x0000ffff
+ sub k, \m, #(0x100000000 - \k) & 0xfff000
+ sub k, k, #(0x100000000 - \k) & 0xfff
+.else
+ movz k, #0x100000000 - \k
+ sub k, \m, k
+.endif
+ add \a, \a, k // k[i] + m[g] + a
+ and f, \b, \d // d & b
+ add \a, \a, tmp // k[i] + m[g] + a + (~d & c)
+ add \a, \a, f // k[i] + m[g] + a + (~d & c) + (d & b)
+ ror \a, \a, #32-\s
+ add \a, \a, \b
+.endm
+
+ /* f = b ^ c ^ d */
+.macro f2 f, b, c, d
+ eor \f, \c, \d
+ eor \f, \f, \b
+.endm
+
+ /* f = c ^ (b | ~d) */
+.macro f3 f, b, c, d
+ orn \f, \b, \d
+ eor \f, \f, \c
+.endm
+
+ /* do 4 rounds */
+.macro rounds f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3
+ round a, b, c, d, \f, \k0, \m0, \s0
+ round d, a, b, c, \f, \k1, \m1, \s1
+ round c, d, a, b, \f, \k2, \m2, \s2
+ round b, c, d, a, \f, \k3, \m3, \s3
+.endm
+
+ /* do 4 rounds with f0, f1, f2, f3 */
+.macro rounds0 m0, m1, m2, m3, k0, k1, k2, k3
+ rounds f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3
+.endm
+
+.macro rounds1 m0, m1, m2, m3, k0, k1, k2, k3
+ round1 a, b, c, d, \k0, \m0, 5
+ round1 d, a, b, c, \k1, \m1, 9
+ round1 c, d, a, b, \k2, \m2, 14
+ round1 b, c, d, a, \k3, \m3, 20
+.endm
+
+.macro rounds2 m0, m1, m2, m3, k0, k1, k2, k3
+ rounds f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3
+.endm
+
+.macro rounds3 m0, m1, m2, m3, k0, k1, k2, k3
+ rounds f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3
+.endm
+
+ /* md5block(MD5_CTX, buf, len) */
+ENTRY(_libmd_md5block)
+ctx .req x0
+buf .req x1
+len .req x2
+end .req x2 // aliases len
+a .req w3
+b .req w4
+c .req w5
+d .req w6
+f .req w7
+tmp .req w8
+k .req w9
+m0 .req w10
+m1 .req w11
+m2 .req w12
+m3 .req w13
+m4 .req w14
+m5 .req w15
+m6 .req w16
+m7 .req w17
+ // x18 is the platform register
+m8 .req w19
+m9 .req w20
+m10 .req w21
+m11 .req w22
+m12 .req w23
+m13 .req w24
+m14 .req w25
+m15 .req w26
+
+a_ .req m0
+b_ .req m7
+c_ .req m14
+d_ .req m5
+
+ stp x19, x20, [sp, #-0x40]!
+ stp x21, x22, [sp, #0x10]
+ stp x23, x24, [sp, #0x20]
+ stp x25, x26, [sp, #0x30]
+
+ bics len, len, #63 // length in blocks
+ add end, buf, len // end pointer
+
+ beq .Lend // was len == 0 after BICS?
+
+ ldp a, b, [ctx, #0]
+ ldp c, d, [ctx, #8]
+
+ /* first eight rounds interleaved with data loads */
+.Lloop: ldp m0, m1, [buf, #0]
+ round a, b, c, d, f0, 0xd76aa478, m0, 7
+ ldp m2, m3, [buf, #8]
+ round d, a, b, c, f0, 0xe8c7b756, m1, 12
+ ldp m4, m5, [buf, #16]
+ round c, d, a, b, f0, 0x242070db, m2, 17
+ ldp m6, m7, [buf, #24]
+ round b, c, d, a, f0, 0xc1bdceee, m3, 22
+
+ ldp m8, m9, [buf, #32]
+ round a, b, c, d, f0, 0xf57c0faf, m4, 7
+ ldp m10, m11, [buf, #40]
+ round d, a, b, c, f0, 0x4787c62a, m5, 12
+ ldp m12, m13, [buf, #48]
+ round c, d, a, b, f0, 0xa8304613, m6, 17
+ ldp m14, m15, [buf, #56]
+ round b, c, d, a, f0, 0xfd469501, m7, 22
+
+ /* remaining rounds use the roundsX macros */
+// rounds0 m0, m1, m2, m3, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee
+// rounds0 m4, m5, m6, m7, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501
+ rounds0 m8, m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
+ rounds0 m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
+
+ rounds1 m1, m6, m11, m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
+ rounds1 m5, m10, m15, m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
+ rounds1 m9, m14, m3, m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
+ rounds1 m13, m2, m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
+
+ rounds2 m5, m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
+ rounds2 m1, m4, m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
+ rounds2 m13, m0, m3, m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
+ rounds2 m9, m12, m15, m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
+
+ rounds3 m0, m7, m14, m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
+ rounds3 m12, m3, m10, m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
+ rounds3 m8, m15, m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
+ rounds3 m4, m11, m2, m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+
+ ldp a_, b_, [ctx, #0]
+ ldp c_, d_, [ctx, #8]
+ add a, a, a_
+ add b, b, b_
+ add c, c, c_
+ add d, d, d_
+ stp a, b, [ctx, #0]
+ stp c, d, [ctx, #8]
+
+ add buf, buf, #64
+ cmp buf, end
+ bne .Lloop
+
+.Lend: ldp x25, x26, [sp, #0x30]
+ ldp x23, x24, [sp, #0x20]
+ ldp x21, x22, [sp, #0x10]
+ ldp x19, x20, [sp], #0x40
+
+ ret
+END(_libmd_md5block)
+
+GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/sys/crypto/md5/md5block_amd64.S b/sys/crypto/md5/md5block_amd64.S
new file mode 100644
--- /dev/null
+++ b/sys/crypto/md5/md5block_amd64.S
@@ -0,0 +1,388 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <machine/asm.h>
+
+ // md5block(MD5_CTX, buf, len)
+ENTRY(_libmd_md5block_baseline)
+.macro round a, b, c, d, f, k, m, s
+ \f %ebp, \b, \c, \d
+ add $\k, \a // a + k[i]
+ add ((\m)%16*4)(%rsi), \a // a + k[i] + m[g]
+ add %ebp, \a // a + k[i] + m[g] + f
+ rol $\s, \a
+ add \b, \a
+.endm
+
+ // f = b ? c : d
+.macro f0 f, b, c, d
+ mov \c, \f
+ xor \d, \f
+ and \b, \f
+ xor \d, \f
+.endm
+
+ // f = d ? b : c
+.macro f1 f, b, c, d
+ mov \c, \f
+ xor \b, \f
+ and \d, \f
+ xor \c, \f
+.endm
+
+ // f = b ^ c ^ d
+.macro f2 f, b, c, d
+ mov \c, \f
+ xor \d, \f
+ xor \b, \f
+.endm
+
+ // f = c ^ (b | ~d)
+.macro f3 f, b, c, d
+ mov $-1, \f
+ xor \d, \f
+ or \b, \f
+ xor \c, \f
+.endm
+
+ // do 4 rounds
+.macro rounds f, p, q, s0, s1, s2, s3, k0, k1, k2, k3
+ round %eax, %ebx, %ecx, %edx, \f, \k0, \p*0+\q, \s0
+ round %edx, %eax, %ebx, %ecx, \f, \k1, \p*1+\q, \s1
+ round %ecx, %edx, %eax, %ebx, \f, \k2, \p*2+\q, \s2
+ round %ebx, %ecx, %edx, %eax, \f, \k3, \p*3+\q, \s3
+.endm
+
+ // do 4 rounds with f0, f1, f2, f3
+.macro rounds0 i, k0, k1, k2, k3
+ rounds f0, 1, \i, 7, 12, 17, 22, \k0, \k1, \k2, \k3
+.endm
+
+.macro rounds1 i, k0, k1, k2, k3
+ rounds f1, 5, 5*\i+1, 5, 9, 14, 20, \k0, \k1, \k2, \k3
+.endm
+
+.macro rounds2 i, k0, k1, k2, k3
+ rounds f2, 3, 3*\i+5, 4, 11, 16, 23, \k0, \k1, \k2, \k3
+.endm
+
+.macro rounds3 i, k0, k1, k2, k3
+ rounds f3, 7, 7*\i, 6, 10, 15, 21, \k0, \k1, \k2, \k3
+.endm
+
+ push %rbx
+ push %rbp
+ push %r12
+
+ and $~63, %rdx // length in blocks
+ lea (%rsi, %rdx, 1), %r12 // end pointer
+
+ mov (%rdi), %eax // a
+ mov 4(%rdi), %ebx // b
+ mov 8(%rdi), %ecx // c
+ mov 12(%rdi), %edx // d
+
+ cmp %rsi, %r12 // any data to process?
+ je .Lend
+
+ .balign 16
+.Lloop: mov %eax, %r8d
+ mov %ebx, %r9d
+ mov %ecx, %r10d
+ mov %edx, %r11d
+
+ rounds0 0, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee
+ rounds0 4, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501
+ rounds0 8, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
+ rounds0 12, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
+
+ rounds1 16, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
+ rounds1 20, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
+ rounds1 24, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
+ rounds1 28, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
+
+ rounds2 32, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
+ rounds2 36, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
+ rounds2 40, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
+ rounds2 44, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
+
+ rounds3 48, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
+ rounds3 52, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
+ rounds3 56, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
+ rounds3 60, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+
+ add %r8d, %eax
+ add %r9d, %ebx
+ add %r10d, %ecx
+ add %r11d, %edx
+
+ add $64, %rsi
+ cmp %rsi, %r12
+ jne .Lloop
+
+ mov %eax, (%rdi)
+ mov %ebx, 4(%rdi)
+ mov %ecx, 8(%rdi)
+ mov %edx, 12(%rdi)
+
+.Lend: pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+END(_libmd_md5block_baseline)
+
+ /*
+ * An implementation leveraging the ANDN instruction
+ * from BMI1 to shorten some dependency chains.
+ */
+ENTRY(_libmd_md5block_bmi1)
+ // special-cased round 1
+ // f1 = d ? b : c = (d & b) + (~d & c)
+.macro round1 a, b, c, d, k, m, s
+ andn \c, \d, %edi // ~d & c
+ add $\k, \a // a + k[i]
+ mov \d, %ebp
+ add ((\m)%16*4)(%rsi), \a // a + k[i] + m[g]
+ and \b, %ebp // d & b
+ add %edi, \a // a + k[i] + m[g] + (~d & c)
+ add %ebp, \a // a + k[i] + m[g] + (~d & c) + (d & b)
+ rol $\s, \a
+ add \b, \a
+.endm
+
+ // special-cased round 3
+ // f3 = c ^ (b | ~d) = ~(c ^ ~b & d) = -1 - (c ^ ~b & d)
+.macro round3 a, b, c, d, k, m, s
+ andn \d, \b, %ebp
+ add $\k - 1, \a // a + k[i] - 1
+ add ((\m)%16*4)(%rsi), \a // a + k[i] + m[g]
+ xor \c, %ebp
+ sub %ebp, \a // a + k[i] + m[g] + f
+ rol $\s, \a
+ add \b, \a
+.endm
+
+ .purgem rounds1
+.macro rounds1 i, k0, k1, k2, k3
+ round1 %eax, %ebx, %ecx, %edx, \k0, 5*\i+ 1, 5
+ round1 %edx, %eax, %ebx, %ecx, \k1, 5*\i+ 6, 9
+ round1 %ecx, %edx, %eax, %ebx, \k2, 5*\i+11, 14
+ round1 %ebx, %ecx, %edx, %eax, \k3, 5*\i+16, 20
+.endm
+
+ .purgem rounds3
+.macro rounds3 i, k0, k1, k2, k3
+ round3 %eax, %ebx, %ecx, %edx, \k0, 7*\i+ 0, 6
+ round3 %edx, %eax, %ebx, %ecx, \k1, 7*\i+ 7, 10
+ round3 %ecx, %edx, %eax, %ebx, \k2, 7*\i+14, 15
+ round3 %ebx, %ecx, %edx, %eax, \k3, 7*\i+21, 21
+.endm
+
+ push %rbx
+ push %rbp
+ push %r12
+
+ and $~63, %rdx // length in blocks
+ lea (%rsi, %rdx, 1), %r12 // end pointer
+
+ mov (%rdi), %eax // a
+ mov 4(%rdi), %ebx // b
+ mov 8(%rdi), %ecx // c
+ mov 12(%rdi), %edx // d
+
+ cmp %rsi, %r12 // any data to process?
+ je 0f
+
+ push %rdi
+
+ .balign 16
+1: mov %eax, %r8d
+ mov %ebx, %r9d
+ mov %ecx, %r10d
+ mov %edx, %r11d
+
+ rounds0 0, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee
+ rounds0 4, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501
+ rounds0 8, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
+ rounds0 12, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
+
+ rounds1 16, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
+ rounds1 20, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
+ rounds1 24, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
+ rounds1 28, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
+
+ rounds2 32, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
+ rounds2 36, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
+ rounds2 40, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
+ rounds2 44, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
+
+ rounds3 48, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
+ rounds3 52, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
+ rounds3 56, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
+ rounds3 60, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+
+ add %r8d, %eax
+ add %r9d, %ebx
+ add %r10d, %ecx
+ add %r11d, %edx
+
+ add $64, %rsi
+ cmp %rsi, %r12
+ jne 1b
+
+ pop %rdi
+ mov %eax, (%rdi)
+ mov %ebx, 4(%rdi)
+ mov %ecx, 8(%rdi)
+ mov %edx, 12(%rdi)
+
+0: pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+END(_libmd_md5block_bmi1)
+
+#ifndef _KERNEL
+ /*
+ * An implementation leveraging AVX-512 for its VPTERNLOGD
+ * instruction. We're using only XMM registers here,
+ * avoiding costly thermal licensing.
+ */
+ENTRY(_libmd_md5block_avx512)
+.macro vround a, b, c, d, f, i, m, mi, s
+ vmovdqa \d, %xmm4
+ vpternlogd $\f, \b, \c, %xmm4
+ vpaddd 4*(\i)(%rax){1to4}, \m, %xmm5 // m[g] + k[i]
+.if \mi != 0
+ vpshufd $0x55 * \mi, %xmm5, %xmm5 // broadcast to each dword
+.endif
+ vpaddd %xmm5, \a, \a // a + k[i] + m[g]
+ vpaddd %xmm4, \a, \a // a + k[i] + m[g] + f
+ vprold $\s, \a, \a
+ vpaddd \b, \a, \a
+.endm
+
+.macro vrounds f, i, m0, i0, m1, i1, m2, i2, m3, i3, s0, s1, s2, s3
+ vround %xmm0, %xmm1, %xmm2, %xmm3, \f, \i+0, \m0, \i0, \s0
+ vround %xmm3, %xmm0, %xmm1, %xmm2, \f, \i+1, \m1, \i1, \s1
+ vround %xmm2, %xmm3, %xmm0, %xmm1, \f, \i+2, \m2, \i2, \s2
+ vround %xmm1, %xmm2, %xmm3, %xmm0, \f, \i+3, \m3, \i3, \s3
+.endm
+
+/*
+ * d c b f0 f1 f2 f3
+ * 0 0 0 0 0 0 1
+ * 0 0 1 0 0 1 1
+ * 0 1 0 0 1 1 0
+ * 0 1 1 1 1 0 0
+ * 1 0 0 1 0 1 0
+ * 1 0 1 0 1 0 1
+ * 1 1 0 1 0 0 1
+ * 1 1 1 1 1 1 0
+ */
+.macro vrounds0 i, m
+ vrounds 0xd8, \i, \m, 0, \m, 1, \m, 2, \m, 3, 7, 12, 17, 22
+.endm
+
+.macro vrounds1 i, m0, i0, m1, i1, m2, i2, m3, i3
+ vrounds 0xac, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 5, 9, 14, 20
+.endm
+
+.macro vrounds2 i, m0, i0, m1, i1, m2, i2, m3, i3
+ vrounds 0x96, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 4, 11, 16, 23
+.endm
+
+.macro vrounds3 i, m0, i0, m1, i1, m2, i2, m3, i3
+ vrounds 0x63, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 6, 10, 15, 21
+.endm
+
+ and $~63, %rdx // length in blocks
+ add %rsi, %rdx // end pointer
+
+ vmovd (%rdi), %xmm0 // a
+ vmovd 4(%rdi), %xmm1 // b
+ vmovd 8(%rdi), %xmm2 // c
+ vmovd 12(%rdi), %xmm3 // d
+
+ lea keys(%rip), %rax
+
+ cmp %rsi, %rdx // any data to process?
+ je 0f
+
+ .balign 16
+1: vmovdqu 0*4(%rsi), %xmm8 // message words
+ vmovdqu 4*4(%rsi), %xmm9
+ vmovdqu 8*4(%rsi), %xmm10
+ vmovdqu 12*4(%rsi), %xmm11
+
+ vmovdqa %xmm0, %xmm12 // stash old state variables
+ vmovdqa %xmm1, %xmm13
+ vmovdqa %xmm2, %xmm14
+ vmovdqa %xmm3, %xmm15
+
+ vrounds0 0, %xmm8
+ vrounds0 4, %xmm9
+ vrounds0 8, %xmm10
+ vrounds0 12, %xmm11
+
+ vrounds1 16, %xmm8, 1, %xmm9, 2, %xmm10, 3, %xmm8, 0
+ vrounds1 20, %xmm9, 1, %xmm10, 2, %xmm11, 3, %xmm9, 0
+ vrounds1 24, %xmm10, 1, %xmm11, 2, %xmm8, 3, %xmm10, 0
+ vrounds1 28, %xmm11, 1, %xmm8, 2, %xmm9, 3, %xmm11, 0
+
+ vrounds2 32, %xmm9, 1, %xmm10, 0, %xmm10, 3, %xmm11, 2
+ vrounds2 36, %xmm8, 1, %xmm9, 0, %xmm9, 3, %xmm10, 2
+ vrounds2 40, %xmm11, 1, %xmm8, 0, %xmm8, 3, %xmm9, 2
+ vrounds2 44 %xmm10, 1, %xmm11, 0, %xmm11, 3, %xmm8, 2
+
+ vrounds3 48, %xmm8, 0, %xmm9, 3, %xmm11, 2, %xmm9, 1
+ vrounds3 52, %xmm11, 0, %xmm8, 3, %xmm10, 2, %xmm8, 1
+ vrounds3 56, %xmm10, 0, %xmm11, 3, %xmm9, 2, %xmm11, 1
+ vrounds3 60, %xmm9, 0, %xmm10, 3, %xmm8, 2, %xmm10, 1
+
+ vpaddd %xmm12, %xmm0, %xmm0
+ vpaddd %xmm13, %xmm1, %xmm1
+ vpaddd %xmm14, %xmm2, %xmm2
+ vpaddd %xmm15, %xmm3, %xmm3
+
+ add $64, %rsi
+ cmp %rsi, %rdx
+ jne 1b
+
+ vmovd %xmm0, (%rdi)
+ vmovd %xmm1, 4(%rdi)
+ vmovd %xmm2, 8(%rdi)
+ vmovd %xmm3, 12(%rdi)
+
+0: ret
+END(_libmd_md5block_avx512)
+
+ // round keys, for use in md5block_avx512
+ .section .rodata
+ .balign 16
+keys: .4byte 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee
+ .4byte 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501
+ .4byte 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
+ .4byte 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
+
+ .4byte 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
+ .4byte 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
+ .4byte 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
+ .4byte 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
+
+ .4byte 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
+ .4byte 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
+ .4byte 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
+ .4byte 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
+
+ .4byte 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
+ .4byte 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
+ .4byte 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
+ .4byte 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+ .size keys, .-keys
+#endif /* !defined(_KERNEL) */
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/sys/crypto/md5/md5c.c b/sys/crypto/md5/md5c.c
new file mode 100644
--- /dev/null
+++ b/sys/crypto/md5/md5c.c
@@ -0,0 +1,234 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/endian.h>
+#include <sys/types.h>
+#include <sys/md5.h>
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/stdint.h>
+#include <sys/systm.h>
+#define assert(expr) MPASS(expr)
+#else
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#endif /* defined(_KERNEL) */
+
+#define md5block _libmd_md5block
+#ifdef MD5_ASM
+extern void md5block(MD5_CTX *, const void *, size_t);
+#else
+static void md5block(MD5_CTX *, const void *, size_t);
+#endif
+
+void
+MD5Init(MD5_CTX *ctx)
+{
+ ctx->state[0] = 0x67452301;
+ ctx->state[1] = 0xefcdab89;
+ ctx->state[2] = 0x98badcfe;
+ ctx->state[3] = 0x10325476;
+
+ ctx->count[0] = 0;
+ ctx->count[1] = 0;
+}
+
+void
+MD5Update(MD5_CTX *ctx, const void *data, unsigned int len)
+{
+ uint64_t nn;
+ const char *p = data;
+ unsigned num;
+
+ num = ctx->count[0] % MD5_BLOCK_LENGTH;
+ nn = (uint64_t)ctx->count[0] | (uint64_t)ctx->count[1] << 32;
+ nn += len;
+ ctx->count[0] = (uint32_t)nn;
+ ctx->count[1] = (uint32_t)(nn >> 32);
+
+ if (num > 0) {
+ unsigned int n = MD5_BLOCK_LENGTH - num;
+
+ if (n > len)
+ n = len;
+
+ memcpy((char *)ctx->buffer + num, p, n);
+ num += n;
+ if (num == MD5_BLOCK_LENGTH)
+ md5block(ctx, (void *)ctx->buffer, MD5_BLOCK_LENGTH);
+
+ p += n;
+ len -= n;
+ }
+
+ if (len >= MD5_BLOCK_LENGTH) {
+ unsigned n = len & ~(unsigned)(MD5_BLOCK_LENGTH - 1);
+
+ md5block(ctx, p, n);
+ p += n;
+ len -= n;
+ }
+
+ if (len > 0)
+ memcpy((void *)ctx->buffer, p, len);
+}
+
+static void
+MD5Pad(MD5_CTX *ctx)
+{
+ uint64_t len;
+ unsigned t;
+ unsigned char tmp[MD5_BLOCK_LENGTH + sizeof(uint64_t)] = {0x80, 0};
+
+ len = (uint64_t)ctx->count[0] | (uint64_t)ctx->count[1] << 32;
+ t = 64 + 56 - ctx->count[0] % 64;
+ if (t > 64)
+ t -= 64;
+
+ /* length in bits */
+ len <<= 3;
+ le64enc(tmp + t, len);
+ MD5Update(ctx, tmp, t + 8);
+ assert(ctx->count[0] % MD5_BLOCK_LENGTH == 0);
+}
+
+void
+MD5Final(unsigned char md[16], MD5_CTX *ctx)
+{
+ MD5Pad(ctx);
+
+ le32enc(md + 0, ctx->state[0]);
+ le32enc(md + 4, ctx->state[1]);
+ le32enc(md + 8, ctx->state[2]);
+ le32enc(md + 12, ctx->state[3]);
+
+ explicit_bzero(ctx, sizeof(ctx));
+}
+
+#ifndef MD5_ASM
+static const uint32_t K[64] = {
+ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+ 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+ 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+ 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+ 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+ 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+ 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+ 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+ 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+ 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+ 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+ 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+ 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+ 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+ 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+static inline uint32_t
+rol32(uint32_t a, int b)
+{
+ return (a << b | a >> (32 - b));
+}
+
+static void
+md5block(MD5_CTX *ctx, const void *data, size_t len)
+{
+ uint32_t m[16], a0, b0, c0, d0;
+ const char *p = data;
+
+ a0 = ctx->state[0];
+ b0 = ctx->state[1];
+ c0 = ctx->state[2];
+ d0 = ctx->state[3];
+
+ while (len >= MD5_BLOCK_LENGTH) {
+ size_t i;
+ uint32_t a = a0, b = b0, c = c0, d = d0, f, tmp;
+
+# pragma unroll
+ for (i = 0; i < 16; i++)
+ m[i] = le32dec(p + 4*i);
+
+# pragma unroll
+ for (i = 0; i < 16; i++) {
+ const int s[] = { 7, 12, 17, 22 };
+
+ f = d ^ (b & (c ^ d));
+ tmp = d;
+ d = c;
+ c = b;
+ b += rol32(a + f + K[i] + m[i], s[i % 4]);
+ a = tmp;
+ }
+
+# pragma unroll
+ for (; i < 32; i++) {
+ const int s[] = { 5, 9, 14, 20 };
+
+ f = c ^ (d & (b ^ c));
+ tmp = d;
+ d = c;
+ c = b;
+ b += rol32(a + f + K[i] + m[(5*i + 1) % 16], s[i % 4]);
+ a = tmp;
+ }
+
+# pragma unroll
+ for (; i < 48; i++) {
+ const int s[] = { 4, 11, 16, 23 };
+
+ f = b ^ c ^ d;
+ tmp = d;
+ d = c;
+ c = b;
+ b += rol32(a + f + K[i] + m[(3*i + 5) % 16], s[i % 4]);
+ a = tmp;
+ }
+
+# pragma unroll
+ for (; i < 64; i++) {
+ const int s[] = { 6, 10, 15, 21 };
+
+ f = c ^ (b | ~d);
+ tmp = d;
+ d = c;
+ c = b;
+ b += rol32(a + f + K[i] + m[7*i % 16], s[i % 4]);
+ a = tmp;
+ }
+
+ a0 += a;
+ b0 += b;
+ c0 += c;
+ d0 += d;
+
+ p += MD5_BLOCK_LENGTH;
+ len -= MD5_BLOCK_LENGTH;
+ }
+
+ ctx->state[0] = a0;
+ ctx->state[1] = b0;
+ ctx->state[2] = c0;
+ ctx->state[3] = d0;
+}
+#endif /* defined(MD5_ASM) */
+
+#ifdef WEAK_REFS
+/* When building libmd, provide weak references. Note: this is not
+ activated in the context of compiling these sources for internal
+ use in libcrypt.
+ */
+#undef MD5Init
+__weak_reference(_libmd_MD5Init, MD5Init);
+#undef MD5Update
+__weak_reference(_libmd_MD5Update, MD5Update);
+#undef MD5Final
+__weak_reference(_libmd_MD5Final, MD5Final);
+#endif
diff --git a/sys/crypto/md5/md5dispatch_amd64.c b/sys/crypto/md5/md5dispatch_amd64.c
new file mode 100644
--- /dev/null
+++ b/sys/crypto/md5/md5dispatch_amd64.c
@@ -0,0 +1,33 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <machine/specialreg.h>
+#include <sys/types.h>
+#include <sys/md5.h>
+#include <x86/ifunc.h>
+
+extern void _libmd_md5block_baseline(MD5_CTX *, const void *, size_t);
+extern void _libmd_md5block_bmi1(MD5_CTX *, const void *, size_t);
+extern void _libmd_md5block_avx512(MD5_CTX *, const void *, size_t);
+
+DEFINE_UIFUNC(, void, _libmd_md5block, (MD5_CTX *, const void *, size_t))
+{
+ /*
+ * AVX-512 would need to be turned on first in the kernel
+ * and that's too expensive; the BMI1 kernel is plenty fast
+ * and doesn't require any special registers to run.
+ */
+#ifndef _KERNEL
+ if ((cpu_stdext_feature & (CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512VL))
+ == (CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512VL))
+ return (_libmd_md5block_avx512);
+#endif
+
+ if (cpu_stdext_feature & CPUID_STDEXT_BMI1)
+ return (_libmd_md5block_bmi1);
+ else
+ return (_libmd_md5block_baseline);
+}
diff --git a/sys/kern/md5c.c b/sys/kern/md5c.c
deleted file mode 100644
--- a/sys/kern/md5c.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*-
- * SPDX-License-Identifier: RSA-MD
- *
- * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
- *
- * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
- * rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * This code is the same as the code published by RSA Inc. It has been
- * edited for clarity and style only.
- */
-
-#include <sys/types.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#else
-#include <string.h>
-#endif
-
-#include <machine/endian.h>
-#include <sys/endian.h>
-#include <sys/md5.h>
-
-static void MD5Transform(uint32_t [4], const unsigned char [64]);
-
-#if (BYTE_ORDER == LITTLE_ENDIAN)
-#define Encode memcpy
-#define Decode memcpy
-#else
-
-/*
- * Encodes input (uint32_t) into output (unsigned char). Assumes len is
- * a multiple of 4.
- */
-
-static void
-Encode (unsigned char *output, uint32_t *input, unsigned int len)
-{
- unsigned int i;
- uint32_t ip;
-
- for (i = 0; i < len / 4; i++) {
- ip = input[i];
- *output++ = ip;
- *output++ = ip >> 8;
- *output++ = ip >> 16;
- *output++ = ip >> 24;
- }
-}
-
-/*
- * Decodes input (unsigned char) into output (uint32_t). Assumes len is
- * a multiple of 4.
- */
-
-static void
-Decode (uint32_t *output, const unsigned char *input, unsigned int len)
-{
- unsigned int i;
-
- for (i = 0; i < len; i += 4) {
- *output++ = input[i] | (input[i+1] << 8) | (input[i+2] << 16) |
- (input[i+3] << 24);
- }
-}
-#endif
-
-static unsigned char PADDING[64] = {
- 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* F, G, H and I are basic MD5 functions. */
-#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-#define I(x, y, z) ((y) ^ ((x) | (~z)))
-
-/* ROTATE_LEFT rotates x left n bits. */
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
-
-/*
- * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
- * Rotation is separate from addition to prevent recomputation.
- */
-#define FF(a, b, c, d, x, s, ac) { \
- (a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \
- (a) = ROTATE_LEFT ((a), (s)); \
- (a) += (b); \
- }
-#define GG(a, b, c, d, x, s, ac) { \
- (a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \
- (a) = ROTATE_LEFT ((a), (s)); \
- (a) += (b); \
- }
-#define HH(a, b, c, d, x, s, ac) { \
- (a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \
- (a) = ROTATE_LEFT ((a), (s)); \
- (a) += (b); \
- }
-#define II(a, b, c, d, x, s, ac) { \
- (a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \
- (a) = ROTATE_LEFT ((a), (s)); \
- (a) += (b); \
- }
-
-/* MD5 initialization. Begins an MD5 operation, writing a new context. */
-
-void
-MD5Init(MD5_CTX *context)
-{
-
- context->count[0] = context->count[1] = 0;
-
- /* Load magic initialization constants. */
- context->state[0] = 0x67452301;
- context->state[1] = 0xefcdab89;
- context->state[2] = 0x98badcfe;
- context->state[3] = 0x10325476;
-}
-
-/*
- * MD5 block update operation. Continues an MD5 message-digest
- * operation, processing another message block, and updating the
- * context.
- */
-
-void
-MD5Update(MD5_CTX *context, const void *in, unsigned int inputLen)
-{
- unsigned int i, index, partLen;
- const unsigned char *input = in;
-
- /* Compute number of bytes mod 64 */
- index = (unsigned int)((context->count[0] >> 3) & 0x3F);
-
- /* Update number of bits */
- if ((context->count[0] += ((uint32_t)inputLen << 3))
- < ((uint32_t)inputLen << 3))
- context->count[1]++;
- context->count[1] += ((uint32_t)inputLen >> 29);
-
- partLen = 64 - index;
-
- /* Transform as many times as possible. */
- if (inputLen >= partLen) {
- memcpy((void *)&context->buffer[index], (const void *)input,
- partLen);
- MD5Transform (context->state, context->buffer);
-
- for (i = partLen; i + 63 < inputLen; i += 64)
- MD5Transform (context->state, &input[i]);
-
- index = 0;
- }
- else
- i = 0;
-
- /* Buffer remaining input */
- memcpy ((void *)&context->buffer[index], (const void *)&input[i],
- inputLen-i);
-}
-
-/*
- * MD5 padding. Adds padding followed by original length.
- */
-
-static void
-MD5Pad(MD5_CTX *context)
-{
- unsigned char bits[8];
- unsigned int index, padLen;
-
- /* Save number of bits */
- Encode (bits, context->count, 8);
-
- /* Pad out to 56 mod 64. */
- index = (unsigned int)((context->count[0] >> 3) & 0x3f);
- padLen = (index < 56) ? (56 - index) : (120 - index);
- MD5Update (context, PADDING, padLen);
-
- /* Append length (before padding) */
- MD5Update (context, bits, 8);
-}
-
-/*
- * MD5 finalization. Ends an MD5 message-digest operation, writing the
- * the message digest and zeroizing the context.
- */
-
-void
-MD5Final(unsigned char digest[static MD5_DIGEST_LENGTH], MD5_CTX *context)
-{
- /* Do padding. */
- MD5Pad (context);
-
- /* Store state in digest */
- Encode (digest, context->state, MD5_DIGEST_LENGTH);
-
- /* Zeroize sensitive information. */
- explicit_bzero (context, sizeof (*context));
-}
-
-/* MD5 basic transformation. Transforms state based on block. */
-
-static void
-MD5Transform(uint32_t state[4], const unsigned char block[64])
-{
- uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
-
- Decode (x, block, 64);
-
- /* Round 1 */
-#define S11 7
-#define S12 12
-#define S13 17
-#define S14 22
- FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
- FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
- FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
- FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
- FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
- FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
- FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
- FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
- FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
- FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
- FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
- FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
- FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
- FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
- FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
- FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
-
- /* Round 2 */
-#define S21 5
-#define S22 9
-#define S23 14
-#define S24 20
- GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
- GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
- GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
- GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
- GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
- GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
- GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
- GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
- GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
- GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
- GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
- GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
- GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
- GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
- GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
- GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
-
- /* Round 3 */
-#define S31 4
-#define S32 11
-#define S33 16
-#define S34 23
- HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
- HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
- HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
- HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
- HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
- HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
- HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
- HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
- HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
- HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
- HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
- HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
- HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
- HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
- HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
- HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
-
- /* Round 4 */
-#define S41 6
-#define S42 10
-#define S43 15
-#define S44 21
- II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
- II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
- II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
- II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
- II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
- II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
- II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
- II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
- II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
- II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
- II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
- II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
- II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
- II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
- II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
- II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
-
- state[0] += a;
- state[1] += b;
- state[2] += c;
- state[3] += d;
-
- /* Zeroize sensitive information. */
- memset ((void *)x, 0, sizeof (x));
-}
-
-#ifdef WEAK_REFS
-/* When building libmd, provide weak references. Note: this is not
- activated in the context of compiling these sources for internal
- use in libcrypt.
- */
-#undef MD5Init
-__weak_reference(_libmd_MD5Init, MD5Init);
-#undef MD5Update
-__weak_reference(_libmd_MD5Update, MD5Update);
-#undef MD5Final
-__weak_reference(_libmd_MD5Final, MD5Final);
-#endif
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, May 1, 8:05 AM (13 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17877044
Default Alt Text
D45670.diff (37 KB)
Attached To
Mode
D45670: lib/libmd: reimplement and enhance md5
Attached
Detach File
Event Timeline
Log In to Comment