Page MenuHomeFreeBSD

D44274.diff
No OneTemporary

D44274.diff

diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -107,7 +107,8 @@
crypto/openssl/amd64/sha1-x86_64.S optional ossl
crypto/openssl/amd64/sha256-x86_64.S optional ossl
crypto/openssl/amd64/sha512-x86_64.S optional ossl
-crypto/openssl/amd64/ossl_aes_gcm.c optional ossl
+crypto/openssl/amd64/ossl_aes_gcm_avx512.c optional ossl
+crypto/openssl/ossl_aes_gcm.c optional ossl
dev/amdgpio/amdgpio.c optional amdgpio
dev/axgbe/if_axgbe_pci.c optional axp
dev/axgbe/xgbe-desc.c optional axp
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -132,7 +132,7 @@
libkern/umoddi3.c standard
crypto/openssl/ossl_arm.c optional ossl
-crypto/openssl/arm/ossl_aes_gcm.c optional ossl
+crypto/openssl/arm/ossl_aes_gcm_neon.c optional ossl
crypto/openssl/arm/aes-armv4.S optional ossl \
compile-with "${NORMAL_C} -I${SRCTOP}/sys/crypto/openssl"
crypto/openssl/arm/bsaes-armv7.S optional ossl \
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -22,9 +22,11 @@
# openssl ppc common files
crypto/openssl/ossl_ppc.c optional ossl powerpc64 | ossl powerpc64le
+crypto/openssl/ossl_aes_gcm.c optional ossl powerpc64 | ossl powerpc64le
# openssl assembly files (powerpc64le)
crypto/openssl/powerpc64le/aes-ppc.S optional ossl powerpc64le
+crypto/openssl/powerpc64le/aes-gcm-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/aesp8-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/chacha-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/ecp_nistz256-ppc64.S optional ossl powerpc64le
@@ -45,6 +47,7 @@
# openssl assembly files (powerpc64)
crypto/openssl/powerpc64/aes-ppc.S optional ossl powerpc64
+crypto/openssl/powerpc64/aes-gcm-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/aesp8-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/chacha-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/ecp_nistz256-ppc64.S optional ossl powerpc64
diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c
new file mode 100644
--- /dev/null
+++ b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2021, Intel Corporation. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+/*
+ * This file contains an AES-GCM wrapper implementation from OpenSSL, using
+ * VAES extensions. It was ported from cipher_aes_gcm_hw_vaes_avx512.inc.
+ */
+
+#include <sys/endian.h>
+#include <sys/systm.h>
+
+#include <crypto/openssl/ossl.h>
+#include <crypto/openssl/ossl_aes_gcm.h>
+#include <crypto/openssl/ossl_cipher.h>
+
+#include <opencrypto/cryptodev.h>
+
+_Static_assert(
+ sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
+ "ossl_gcm_context too large");
+
+void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
+
+static void
+gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+{
+ KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
+ ("%s: invalid key length %zu", __func__, keylen));
+
+ memset(&ctx->gcm, 0, sizeof(ctx->gcm));
+ memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
+ aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
+ ctx->ops->init(ctx, key, keylen);
+}
+
+static void
+gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
+{
+ (void)ctx->ops->finish(ctx, NULL, 0);
+ memcpy(tag, ctx->gcm.Xi.c, len);
+}
+
+void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
+void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
+void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
+ const unsigned char *iv, size_t ivlen);
+void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
+ size_t len);
+void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
+ unsigned int *pblocklen, const unsigned char *in, size_t len,
+ unsigned char *out);
+void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
+ unsigned int *pblocklen, const unsigned char *in, size_t len,
+ unsigned char *out);
+void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
+
+static void
+gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+{
+ ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
+}
+
+static void
+gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
+ size_t len)
+{
+ KASSERT(len == AES_GCM_IV_LEN,
+ ("%s: invalid IV length %zu", __func__, len));
+
+ ctx->gcm.Yi.u[0] = 0; /* Current counter */
+ ctx->gcm.Yi.u[1] = 0;
+ ctx->gcm.Xi.u[0] = 0; /* AAD hash */
+ ctx->gcm.Xi.u[1] = 0;
+ ctx->gcm.len.u[0] = 0; /* AAD length */
+ ctx->gcm.len.u[1] = 0; /* Message length */
+ ctx->gcm.ares = 0;
+ ctx->gcm.mres = 0;
+
+ ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
+}
+
+static int
+gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
+ size_t len)
+{
+ uint64_t alen = ctx->gcm.len.u[0];
+ size_t lenblks;
+ unsigned int ares;
+
+ /* Bad sequence: call of AAD update after message processing */
+ if (ctx->gcm.len.u[1])
+ return -2;
+
+ alen += len;
+ /* AAD is limited by 2^64 bits, thus 2^61 bytes */
+ if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
+ return -1;
+ ctx->gcm.len.u[0] = alen;
+
+ ares = ctx->gcm.ares;
+ /* Partial AAD block left from previous AAD update calls */
+ if (ares > 0) {
+ /*
+ * Fill partial block buffer till full block
+ * (note, the hash is stored reflected)
+ */
+ while (ares > 0 && len > 0) {
+ ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
+ --len;
+ ares = (ares + 1) % AES_BLOCK_LEN;
+ }
+ /* Full block gathered */
+ if (ares == 0) {
+ ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
+ } else { /* no more AAD */
+ ctx->gcm.ares = ares;
+ return 0;
+ }
+ }
+
+ /* Bulk AAD processing */
+ lenblks = len & ((size_t)(-AES_BLOCK_LEN));
+ if (lenblks > 0) {
+ ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
+ aad += lenblks;
+ len -= lenblks;
+ }
+
+ /* Add remaining AAD to the hash (note, the hash is stored reflected) */
+ if (len > 0) {
+ ares = (unsigned int)len;
+ for (size_t i = 0; i < len; ++i)
+ ctx->gcm.Xi.c[15 - i] ^= aad[i];
+ }
+
+ ctx->gcm.ares = ares;
+
+ return 0;
+}
+
+static int
+_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len, bool encrypt)
+{
+ uint64_t mlen = ctx->gcm.len.u[1];
+
+ mlen += len;
+ if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
+ return -1;
+
+ ctx->gcm.len.u[1] = mlen;
+
+ /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
+ if (ctx->gcm.ares > 0) {
+ ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
+ ctx->gcm.ares = 0;
+ }
+
+ if (encrypt) {
+ ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
+ in, len, out);
+ } else {
+ ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
+ in, len, out);
+ }
+
+ return 0;
+}
+
+static int
+gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len)
+{
+ return _gcm_encrypt_avx512(ctx, in, out, len, true);
+}
+
+static int
+gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len)
+{
+ return _gcm_encrypt_avx512(ctx, in, out, len, false);
+}
+
+static int
+gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
+ size_t len)
+{
+ unsigned int *res = &ctx->gcm.mres;
+
+ /* Finalize AAD processing */
+ if (ctx->gcm.ares > 0)
+ res = &ctx->gcm.ares;
+
+ ossl_aes_gcm_finalize_avx512(ctx, *res);
+
+ ctx->gcm.ares = ctx->gcm.mres = 0;
+
+ if (tag != NULL)
+ return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
+ return 0;
+}
+
+static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
+ .init = gcm_init_avx512,
+ .setiv = gcm_setiv_avx512,
+ .aad = gcm_aad_avx512,
+ .encrypt = gcm_encrypt_avx512,
+ .decrypt = gcm_decrypt_avx512,
+ .finish = gcm_finish_avx512,
+ .tag = gcm_tag,
+};
+
+int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
+
+int
+ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
+ void *_ctx)
+{
+ struct ossl_gcm_context *ctx;
+
+ ctx = _ctx;
+ ctx->ops = &gcm_ops_avx512;
+ gcm_init(ctx, key, klen);
+ return (0);
+}
diff --git a/sys/crypto/openssl/arm/ossl_aes_gcm.c b/sys/crypto/openssl/arm/ossl_aes_gcm_neon.c
rename from sys/crypto/openssl/arm/ossl_aes_gcm.c
rename to sys/crypto/openssl/arm/ossl_aes_gcm_neon.c
diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm.c b/sys/crypto/openssl/ossl_aes_gcm.c
rename from sys/crypto/openssl/amd64/ossl_aes_gcm.c
rename to sys/crypto/openssl/ossl_aes_gcm.c
--- a/sys/crypto/openssl/amd64/ossl_aes_gcm.c
+++ b/sys/crypto/openssl/ossl_aes_gcm.c
@@ -1,6 +1,7 @@
/*
* Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2021, Intel Corporation. All Rights Reserved.
+ * Copyright (c) 2023, Raptor Engineering, LLC. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -9,11 +10,10 @@
*/
/*
- * This file contains 2 AES-GCM wrapper implementations from OpenSSL, using
- * AES-NI and VAES extensions respectively. These were ported from
- * cipher_aes_gcm_hw_aesni.inc and cipher_aes_gcm_hw_vaes_avx512.inc. The
- * AES-NI implementation makes use of a generic C implementation for partial
- * blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined.
+ * This file contains an AES-GCM wrapper implementation from OpenSSL, using
+ * AES-NI (x86) or POWER8 Crypto Extensions (ppc). It was ported from
+ * cipher_aes_gcm_hw_aesni.inc and it makes use of a generic C implementation
+ * for partial blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined.
*/
#include <sys/endian.h>
@@ -29,225 +29,152 @@
sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
"ossl_gcm_context too large");
-void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
-
-static void
-gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
-{
- KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
- ("%s: invalid key length %zu", __func__, keylen));
-
- memset(&ctx->gcm, 0, sizeof(ctx->gcm));
- memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
- aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
- ctx->ops->init(ctx, key, keylen);
-}
-
-static void
-gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
-{
- (void)ctx->ops->finish(ctx, NULL, 0);
- memcpy(tag, ctx->gcm.Xi.c, len);
-}
+#if defined(__amd64__) || defined(__i386__)
+#define AES_set_encrypt_key aesni_set_encrypt_key
+#define AES_gcm_encrypt aesni_gcm_encrypt
+#define AES_gcm_decrypt aesni_gcm_decrypt
+#define AES_encrypt aesni_encrypt
+#define AES_ctr32_encrypt_blocks aesni_ctr32_encrypt_blocks
+#define GCM_init gcm_init_avx
+#define GCM_gmult gcm_gmult_avx
+#define GCM_ghash gcm_ghash_avx
+
+void AES_set_encrypt_key(const void *key, int bits, void *ctx);
+size_t AES_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+size_t AES_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks);
+void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t blocks, void *ks, const unsigned char *iv);
-void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
-void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
-void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
- const unsigned char *iv, size_t ivlen);
-void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
+void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]);
+void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]);
+void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
size_t len);
-void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
- unsigned int *pblocklen, const unsigned char *in, size_t len,
- unsigned char *out);
-void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
- unsigned int *pblocklen, const unsigned char *in, size_t len,
- unsigned char *out);
-void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
-
-static void
-gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
-{
- ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
-}
-static void
-gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
- size_t len)
-{
- KASSERT(len == AES_GCM_IV_LEN,
- ("%s: invalid IV length %zu", __func__, len));
+#elif defined(__powerpc64__)
+#define AES_set_encrypt_key aes_p8_set_encrypt_key
+#define AES_gcm_encrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,1)
+#define AES_gcm_decrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,0)
+#define AES_encrypt aes_p8_encrypt
+#define AES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
+#define GCM_init gcm_init_p8
+#define GCM_gmult gcm_gmult_p8
+#define GCM_ghash gcm_ghash_p8
+
+size_t ppc_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+size_t ppc_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
- ctx->gcm.Yi.u[0] = 0; /* Current counter */
- ctx->gcm.Yi.u[1] = 0;
- ctx->gcm.Xi.u[0] = 0; /* AAD hash */
- ctx->gcm.Xi.u[1] = 0;
- ctx->gcm.len.u[0] = 0; /* AAD length */
- ctx->gcm.len.u[1] = 0; /* Message length */
- ctx->gcm.ares = 0;
- ctx->gcm.mres = 0;
+void AES_set_encrypt_key(const void *key, int bits, void *ctx);
+void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks);
+void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t blocks, void *ks, const unsigned char *iv);
- ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
-}
+void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]);
+void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]);
+void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
+ size_t len);
-static int
-gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
- size_t len)
+static size_t
+ppc_aes_gcm_crypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key, unsigned char ivec_[16], uint64_t *Xi,
+ int encrypt)
{
- uint64_t alen = ctx->gcm.len.u[0];
- size_t lenblks;
- unsigned int ares;
-
- /* Bad sequence: call of AAD update after message processing */
- if (ctx->gcm.len.u[1])
- return -2;
-
- alen += len;
- /* AAD is limited by 2^64 bits, thus 2^61 bytes */
- if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
- return -1;
- ctx->gcm.len.u[0] = alen;
+ union {
+ uint32_t d[4];
+ uint8_t c[16];
+ } *ivec = (void *)ivec_;
+ int s = 0;
+ int ndone = 0;
+ int ctr_reset = 0;
+ uint32_t ivec_val;
+ uint64_t blocks_unused;
+ uint64_t nb = len / 16;
+ uint64_t next_ctr = 0;
+ unsigned char ctr_saved[12];
+
+ memcpy(ctr_saved, ivec, 12);
+
+ while (nb) {
+ ivec_val = ivec->d[3];
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ivec_val = bswap32(ivec_val);
+#endif
- ares = ctx->gcm.ares;
- /* Partial AAD block left from previous AAD update calls */
- if (ares > 0) {
- /*
- * Fill partial block buffer till full block
- * (note, the hash is stored reflected)
- */
- while (ares > 0 && len > 0) {
- ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
- --len;
- ares = (ares + 1) % AES_BLOCK_LEN;
- }
- /* Full block gathered */
- if (ares == 0) {
- ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
- } else { /* no more AAD */
- ctx->gcm.ares = ares;
- return 0;
+ blocks_unused = (uint64_t) 0xffffffffU + 1 - (uint64_t)ivec_val;
+ if (nb > blocks_unused) {
+ len = blocks_unused * 16;
+ nb -= blocks_unused;
+ next_ctr = blocks_unused;
+ ctr_reset = 1;
+ } else {
+ len = nb * 16;
+ next_ctr = nb;
+ nb = 0;
}
- }
- /* Bulk AAD processing */
- lenblks = len & ((size_t)(-AES_BLOCK_LEN));
- if (lenblks > 0) {
- ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
- aad += lenblks;
- len -= lenblks;
- }
+ s = encrypt ? ppc_aes_gcm_encrypt(in, out, len, key, ivec->c, Xi) :
+ ppc_aes_gcm_decrypt(in, out, len, key, ivec->c, Xi);
- /* Add remaining AAD to the hash (note, the hash is stored reflected) */
- if (len > 0) {
- ares = (unsigned int)len;
- for (size_t i = 0; i < len; ++i)
- ctx->gcm.Xi.c[15 - i] ^= aad[i];
+ /* add counter to ivec */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ivec->d[3] = bswap32(ivec_val + next_ctr);
+#else
+ ivec->d[3] += next_ctr;
+#endif
+ if (ctr_reset) {
+ ctr_reset = 0;
+ in += len;
+ out += len;
+ }
+ memcpy(ivec, ctr_saved, 12);
+ ndone += s;
}
- ctx->gcm.ares = ares;
-
- return 0;
+ return ndone;
}
-static int
-_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len, bool encrypt)
-{
- uint64_t mlen = ctx->gcm.len.u[1];
-
- mlen += len;
- if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
- return -1;
-
- ctx->gcm.len.u[1] = mlen;
-
- /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
- if (ctx->gcm.ares > 0) {
- ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
- ctx->gcm.ares = 0;
- }
-
- if (encrypt) {
- ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
- in, len, out);
- } else {
- ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
- in, len, out);
- }
-
- return 0;
-}
+#else
+#error "Unsupported architecture!"
+#endif
-static int
-gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len)
+static void
+gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
- return _gcm_encrypt_avx512(ctx, in, out, len, true);
-}
+ KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
+ ("%s: invalid key length %zu", __func__, keylen));
-static int
-gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len)
-{
- return _gcm_encrypt_avx512(ctx, in, out, len, false);
+ memset(&ctx->gcm, 0, sizeof(ctx->gcm));
+ memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
+ AES_set_encrypt_key(key, keylen, &ctx->aes_ks);
+ ctx->ops->init(ctx, key, keylen);
}
-static int
-gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
- size_t len)
+static void
+gcm_tag_op(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
{
- unsigned int *res = &ctx->gcm.mres;
-
- /* Finalize AAD processing */
- if (ctx->gcm.ares > 0)
- res = &ctx->gcm.ares;
-
- ossl_aes_gcm_finalize_avx512(ctx, *res);
-
- ctx->gcm.ares = ctx->gcm.mres = 0;
-
- if (tag != NULL)
- return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
- return 0;
+ (void)ctx->ops->finish(ctx, NULL, 0);
+ memcpy(tag, ctx->gcm.Xi.c, len);
}
-static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
- .init = gcm_init_avx512,
- .setiv = gcm_setiv_avx512,
- .aad = gcm_aad_avx512,
- .encrypt = gcm_encrypt_avx512,
- .decrypt = gcm_decrypt_avx512,
- .finish = gcm_finish_avx512,
- .tag = gcm_tag,
-};
-
-size_t aesni_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16], uint64_t *Xi);
-size_t aesni_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16], uint64_t *Xi);
-void aesni_encrypt(const unsigned char *in, unsigned char *out, void *ks);
-void aesni_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
- size_t blocks, void *ks, const unsigned char *iv);
-
-void gcm_init_avx(__uint128_t Htable[16], uint64_t Xi[2]);
-void gcm_gmult_avx(uint64_t Xi[2], const __uint128_t Htable[16]);
-void gcm_ghash_avx(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
- size_t len);
-
static void
-gcm_init_aesni(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+gcm_init_op(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
- aesni_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks);
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.H.u[0] = bswap64(ctx->gcm.H.u[0]);
ctx->gcm.H.u[1] = bswap64(ctx->gcm.H.u[1]);
#endif
- gcm_init_avx(ctx->gcm.Htable, ctx->gcm.H.u);
+ GCM_init(ctx->gcm.Htable, ctx->gcm.H.u);
}
static void
-gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv,
+gcm_setiv_op(struct ossl_gcm_context *ctx, const unsigned char *iv,
size_t len)
{
uint32_t ctr;
@@ -269,7 +196,7 @@
ctx->gcm.Xi.u[0] = 0;
ctx->gcm.Xi.u[1] = 0;
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks);
ctr++;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -280,7 +207,7 @@
}
static int
-gcm_aad_aesni(struct ossl_gcm_context *ctx, const unsigned char *aad,
+gcm_aad_op(struct ossl_gcm_context *ctx, const unsigned char *aad,
size_t len)
{
size_t i;
@@ -303,14 +230,14 @@
n = (n + 1) % 16;
}
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
else {
ctx->gcm.ares = n;
return 0;
}
}
if ((i = (len & (size_t)-AES_BLOCK_LEN))) {
- gcm_ghash_avx(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i);
+ GCM_ghash(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i);
aad += i;
len -= i;
}
@@ -341,7 +268,7 @@
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -354,7 +281,7 @@
n = mres % 16;
for (i = 0; i < len; ++i) {
if (n == 0) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
&ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -366,7 +293,7 @@
ctx->gcm.Xi.c[n] ^= out[i] = in[i] ^ ctx->gcm.EKi.c[n];
mres = n = (n + 1) % 16;
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
}
ctx->gcm.mres = mres;
@@ -390,7 +317,7 @@
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -408,7 +335,7 @@
n = (n + 1) % 16;
}
if (n == 0) {
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
mres = 0;
} else {
ctx->gcm.mres = n;
@@ -418,7 +345,7 @@
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
- aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
+ AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
ctr += (unsigned int)j;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -430,12 +357,12 @@
while (j--) {
for (i = 0; i < 16; ++i)
ctx->gcm.Xi.c[i] ^= out[i];
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
out += 16;
}
}
if (len) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -453,7 +380,7 @@
}
static int
-gcm_encrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
+gcm_encrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
size_t bulk = 0, res;
@@ -463,7 +390,7 @@
if ((error = gcm_encrypt(ctx, in, out, res)) != 0)
return error;
- bulk = aesni_gcm_encrypt(in + res, out + res, len - res,
+ bulk = AES_gcm_encrypt(in + res, out + res, len - res,
&ctx->aes_ks, ctx->gcm.Yi.c, ctx->gcm.Xi.u);
ctx->gcm.len.u[1] += bulk;
bulk += res;
@@ -492,7 +419,7 @@
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -506,7 +433,7 @@
for (i = 0; i < len; ++i) {
uint8_t c;
if (n == 0) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
&ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -520,7 +447,7 @@
ctx->gcm.Xi.c[n] ^= c;
mres = n = (n + 1) % 16;
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
}
ctx->gcm.mres = mres;
@@ -544,7 +471,7 @@
if (ctx->gcm.ares) {
/* First call to decrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -564,7 +491,7 @@
n = (n + 1) % 16;
}
if (n == 0) {
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
mres = 0;
} else {
ctx->gcm.mres = n;
@@ -578,12 +505,12 @@
size_t k;
for (k = 0; k < 16; ++k)
ctx->gcm.Xi.c[k] ^= in[k];
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
in += 16;
}
j = i / 16;
in -= i;
- aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
+ AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
ctr += (unsigned int)j;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -595,7 +522,7 @@
len -= i;
}
if (len) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -615,7 +542,7 @@
}
static int
-gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
+gcm_decrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
size_t bulk = 0, res;
@@ -625,8 +552,8 @@
if ((error = gcm_decrypt(ctx, in, out, res)) != 0)
return error;
- bulk = aesni_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks,
- ctx->gcm.Yi.c, ctx->gcm.Xi.u);
+ bulk = AES_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks,
+ ctx->gcm.Yi.c, ctx->gcm.Xi.u);
ctx->gcm.len.u[1] += bulk;
bulk += res;
@@ -637,14 +564,14 @@
}
static int
-gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag,
+gcm_finish_op(struct ossl_gcm_context *ctx, const unsigned char *tag,
size_t len)
{
uint64_t alen = ctx->gcm.len.u[0] << 3;
uint64_t clen = ctx->gcm.len.u[1] << 3;
if (ctx->gcm.mres || ctx->gcm.ares)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
#if BYTE_ORDER == LITTLE_ENDIAN
alen = bswap64(alen);
@@ -653,7 +580,7 @@
ctx->gcm.Xi.u[0] ^= alen;
ctx->gcm.Xi.u[1] ^= clen;
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0];
ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1];
@@ -663,40 +590,26 @@
return 0;
}
-static const struct ossl_aes_gcm_ops gcm_ops_aesni = {
- .init = gcm_init_aesni,
- .setiv = gcm_setiv_aesni,
- .aad = gcm_aad_aesni,
- .encrypt = gcm_encrypt_aesni,
- .decrypt = gcm_decrypt_aesni,
- .finish = gcm_finish_aesni,
- .tag = gcm_tag,
+static const struct ossl_aes_gcm_ops gcm_ops = {
+ .init = gcm_init_op,
+ .setiv = gcm_setiv_op,
+ .aad = gcm_aad_op,
+ .encrypt = gcm_encrypt_op,
+ .decrypt = gcm_decrypt_op,
+ .finish = gcm_finish_op,
+ .tag = gcm_tag_op,
};
-int ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, void *_ctx);
-
-int
-ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen,
- void *_ctx)
-{
- struct ossl_gcm_context *ctx;
-
- ctx = _ctx;
- ctx->ops = &gcm_ops_aesni;
- gcm_init(ctx, key, klen);
- return (0);
-}
-
-int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
+int ossl_aes_gcm_setkey(const unsigned char *key, int klen, void *_ctx);
int
-ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
+ossl_aes_gcm_setkey(const unsigned char *key, int klen,
void *_ctx)
{
struct ossl_gcm_context *ctx;
ctx = _ctx;
- ctx->ops = &gcm_ops_avx512;
+ ctx->ops = &gcm_ops;
gcm_init(ctx, key, klen);
return (0);
}
diff --git a/sys/crypto/openssl/ossl_ppc.c b/sys/crypto/openssl/ossl_ppc.c
--- a/sys/crypto/openssl/ossl_ppc.c
+++ b/sys/crypto/openssl/ossl_ppc.c
@@ -38,9 +38,12 @@
ossl_cipher_setkey_t aes_p8_set_encrypt_key;
ossl_cipher_setkey_t aes_p8_set_decrypt_key;
+
ossl_cipher_setkey_t vpaes_set_encrypt_key;
ossl_cipher_setkey_t vpaes_set_decrypt_key;
+ossl_cipher_setkey_t ossl_aes_gcm_setkey;
+
void
ossl_cpuid(struct ossl_softc *sc)
{
@@ -75,7 +78,11 @@
ossl_cipher_aes_cbc.set_encrypt_key = aes_p8_set_encrypt_key;
ossl_cipher_aes_cbc.set_decrypt_key = aes_p8_set_decrypt_key;
sc->has_aes = true;
- } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) {
+
+ ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey;
+ ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey;
+ sc->has_aes_gcm = true;
+ } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) {
ossl_cipher_aes_cbc.set_encrypt_key = vpaes_set_encrypt_key;
ossl_cipher_aes_cbc.set_decrypt_key = vpaes_set_decrypt_key;
sc->has_aes = true;
diff --git a/sys/crypto/openssl/ossl_x86.c b/sys/crypto/openssl/ossl_x86.c
--- a/sys/crypto/openssl/ossl_x86.c
+++ b/sys/crypto/openssl/ossl_x86.c
@@ -56,7 +56,7 @@
#ifdef __amd64__
int ossl_vaes_vpclmulqdq_capable(void);
-ossl_cipher_setkey_t ossl_aes_gcm_setkey_aesni;
+ossl_cipher_setkey_t ossl_aes_gcm_setkey;
ossl_cipher_setkey_t ossl_aes_gcm_setkey_avx512;
#endif
@@ -141,8 +141,8 @@
} else if ((cpu_feature2 &
(CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) ==
(CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) {
- ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey_aesni;
- ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey_aesni;
+ ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey;
+ ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey;
sc->has_aes_gcm = true;
} else {
sc->has_aes_gcm = false;
diff --git a/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S
new file mode 100644
--- /dev/null
+++ b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S
@@ -0,0 +1,1338 @@
+.machine "any"
+.text
+
+
+
+
+
+.macro .Loop_aes_middle4x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+.endm
+
+
+
+
+
+.macro .Loop_aes_middle8x
+ xxlor 23+32, 1, 1
+ xxlor 24+32, 2, 2
+ xxlor 25+32, 3, 3
+ xxlor 26+32, 4, 4
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 5, 5
+ xxlor 24+32, 6, 6
+ xxlor 25+32, 7, 7
+ xxlor 26+32, 8, 8
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+.endm
+
+
+
+
+ppc_aes_gcm_ghash:
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+ blr
+
+
+
+
+
+.macro ppc_aes_gcm_ghash2_4x
+
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 27, 23, 27
+
+
+ .long 0x1309A4C8
+ .long 0x1326ACC8
+ .long 0x1343B4C8
+ vxor 19, 19, 27
+ .long 0x12EC9CC8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D9CC8
+ .long 0x132AA4C8
+ .long 0x1347ACC8
+ .long 0x1364B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E9CC8
+ .long 0x132BA4C8
+ .long 0x1348ACC8
+ .long 0x1365B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+.endm
+
+
+
+
+.macro ppc_update_hash_1x
+ vxor 28, 28, 0
+
+ vxor 19, 19, 19
+
+ .long 0x12C3E4C8
+ .long 0x12E4E4C8
+ .long 0x1305E4C8
+
+ .long 0x137614C8
+
+ vsldoi 25, 23, 19, 8
+ vsldoi 26, 19, 23, 8
+ vxor 22, 22, 25
+ vxor 24, 24, 26
+
+ vsldoi 22, 22, 22, 8
+ vxor 22, 22, 27
+
+ vsldoi 20, 22, 22, 8
+ .long 0x12D614C8
+ vxor 20, 20, 24
+ vxor 22, 22, 20
+
+ vor 0,22,22
+
+.endm
+
+
+
+
+
+
+
+
+
+
+
+
+
+.global ppc_aes_gcm_encrypt
+.align 5
+ppc_aes_gcm_encrypt:
+_ppc_aes_gcm_encrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_ghash
+ b aes_gcm_out
+
+Do_next_ghash:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block
+
+ vor 30,29,29
+
+.Loop_last_block:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10, 240(6)
+
+ cmpdi 12, 16
+ blt Final_block
+
+.macro .Loop_aes_middle_1x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 9, 9
+ .long 0x11EF9D08
+.endm
+
+Next_rem_block:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x
+
+Do_next_1x:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x
+
+Do_final_1x:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
+
+
+
+
+
+
+
+Write_partial_block:
+ li 10, 192
+ stxvb16x 15+32, 10, 1
+
+
+ addi 10, 9, -1
+ addi 16, 1, 191
+
+ mtctr 12
+ li 15, 0
+
+Write_last_byte:
+ lbzu 14, 1(16)
+ stbu 14, 1(10)
+ bdnz Write_last_byte
+ blr
+
+aes_gcm_out:
+
+ stxvb16x 32, 0, 8
+ add 3, 11, 12
+
+ li 9, 256
+ lvx 20, 9, 1
+ addi 9, 9, 16
+ lvx 21, 9, 1
+ addi 9, 9, 16
+ lvx 22, 9, 1
+ addi 9, 9, 16
+ lvx 23, 9, 1
+ addi 9, 9, 16
+ lvx 24, 9, 1
+ addi 9, 9, 16
+ lvx 25, 9, 1
+ addi 9, 9, 16
+ lvx 26, 9, 1
+ addi 9, 9, 16
+ lvx 27, 9, 1
+ addi 9, 9, 16
+ lvx 28, 9, 1
+ addi 9, 9, 16
+ lvx 29, 9, 1
+ addi 9, 9, 16
+ lvx 30, 9, 1
+ addi 9, 9, 16
+ lvx 31, 9, 1
+
+ ld 0, 528(1)
+ ld 14,112(1)
+ ld 15,120(1)
+ ld 16,128(1)
+ ld 17,136(1)
+ ld 18,144(1)
+ ld 19,152(1)
+ ld 20,160(1)
+ ld 21,168(1)
+
+ mtlr 0
+ addi 1, 1, 512
+ blr
+
+
+
+
+.global ppc_aes_gcm_decrypt
+.align 5
+ppc_aes_gcm_decrypt:
+_ppc_aes_gcm_decrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x_dec
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x_dec:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block_dec
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block_dec:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_last_aes_dec
+ b aes_gcm_out
+
+Do_last_aes_dec:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+ xxlor 15+32, 15, 15
+ xxlor 16+32, 16, 16
+ xxlor 17+32, 17, 17
+ xxlor 18+32, 18, 18
+ xxlor 19+32, 19, 19
+ xxlor 20+32, 20, 20
+ xxlor 21+32, 21, 21
+ xxlor 22+32, 22, 22
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block_dec
+
+ vor 30,29,29
+
+.Loop_last_block_dec:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10,240(6)
+
+ cmpdi 12, 16
+ blt Final_block_dec
+
+Next_rem_block_dec:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x_dec
+
+Do_next_1x_dec:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block_dec
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block_dec:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x_dec
+
+Do_final_1x_dec:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
diff --git a/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S b/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S
new file mode 100644
--- /dev/null
+++ b/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S
@@ -0,0 +1,1340 @@
+/* Do not modify. This file is auto-generated from aes-ppc.pl. */
+.machine "any"
+.abiversion 2
+.text
+
+
+
+
+
+.macro .Loop_aes_middle4x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+.endm
+
+
+
+
+
+.macro .Loop_aes_middle8x
+ xxlor 23+32, 1, 1
+ xxlor 24+32, 2, 2
+ xxlor 25+32, 3, 3
+ xxlor 26+32, 4, 4
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 5, 5
+ xxlor 24+32, 6, 6
+ xxlor 25+32, 7, 7
+ xxlor 26+32, 8, 8
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+.endm
+
+
+
+
+ppc_aes_gcm_ghash:
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+ blr
+
+
+
+
+
+.macro ppc_aes_gcm_ghash2_4x
+
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 27, 23, 27
+
+
+ .long 0x1309A4C8
+ .long 0x1326ACC8
+ .long 0x1343B4C8
+ vxor 19, 19, 27
+ .long 0x12EC9CC8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D9CC8
+ .long 0x132AA4C8
+ .long 0x1347ACC8
+ .long 0x1364B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E9CC8
+ .long 0x132BA4C8
+ .long 0x1348ACC8
+ .long 0x1365B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+.endm
+
+
+
+
+.macro ppc_update_hash_1x
+ vxor 28, 28, 0
+
+ vxor 19, 19, 19
+
+ .long 0x12C3E4C8
+ .long 0x12E4E4C8
+ .long 0x1305E4C8
+
+ .long 0x137614C8
+
+ vsldoi 25, 23, 19, 8
+ vsldoi 26, 19, 23, 8
+ vxor 22, 22, 25
+ vxor 24, 24, 26
+
+ vsldoi 22, 22, 22, 8
+ vxor 22, 22, 27
+
+ vsldoi 20, 22, 22, 8
+ .long 0x12D614C8
+ vxor 20, 20, 24
+ vxor 22, 22, 20
+
+ vor 0,22,22
+
+.endm
+
+
+
+
+
+
+
+
+
+
+
+
+
+.global ppc_aes_gcm_encrypt
+.align 5
+ppc_aes_gcm_encrypt:
+_ppc_aes_gcm_encrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_ghash
+ b aes_gcm_out
+
+Do_next_ghash:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block
+
+ vor 30,29,29
+
+.Loop_last_block:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10, 240(6)
+
+ cmpdi 12, 16
+ blt Final_block
+
+.macro .Loop_aes_middle_1x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 9, 9
+ .long 0x11EF9D08
+.endm
+
+Next_rem_block:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x
+
+Do_next_1x:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x
+
+Do_final_1x:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
+
+
+
+
+
+
+
+Write_partial_block:
+ li 10, 192
+ stxvb16x 15+32, 10, 1
+
+
+ addi 10, 9, -1
+ addi 16, 1, 191
+
+ mtctr 12
+ li 15, 0
+
+Write_last_byte:
+ lbzu 14, 1(16)
+ stbu 14, 1(10)
+ bdnz Write_last_byte
+ blr
+
+aes_gcm_out:
+
+ stxvb16x 32, 0, 8
+ add 3, 11, 12
+
+ li 9, 256
+ lvx 20, 9, 1
+ addi 9, 9, 16
+ lvx 21, 9, 1
+ addi 9, 9, 16
+ lvx 22, 9, 1
+ addi 9, 9, 16
+ lvx 23, 9, 1
+ addi 9, 9, 16
+ lvx 24, 9, 1
+ addi 9, 9, 16
+ lvx 25, 9, 1
+ addi 9, 9, 16
+ lvx 26, 9, 1
+ addi 9, 9, 16
+ lvx 27, 9, 1
+ addi 9, 9, 16
+ lvx 28, 9, 1
+ addi 9, 9, 16
+ lvx 29, 9, 1
+ addi 9, 9, 16
+ lvx 30, 9, 1
+ addi 9, 9, 16
+ lvx 31, 9, 1
+
+ ld 0, 528(1)
+ ld 14,112(1)
+ ld 15,120(1)
+ ld 16,128(1)
+ ld 17,136(1)
+ ld 18,144(1)
+ ld 19,152(1)
+ ld 20,160(1)
+ ld 21,168(1)
+
+ mtlr 0
+ addi 1, 1, 512
+ blr
+
+
+
+
+.global ppc_aes_gcm_decrypt
+.align 5
+ppc_aes_gcm_decrypt:
+_ppc_aes_gcm_decrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x_dec
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x_dec:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block_dec
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block_dec:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_last_aes_dec
+ b aes_gcm_out
+
+Do_last_aes_dec:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+ xxlor 15+32, 15, 15
+ xxlor 16+32, 16, 16
+ xxlor 17+32, 17, 17
+ xxlor 18+32, 18, 18
+ xxlor 19+32, 19, 19
+ xxlor 20+32, 20, 20
+ xxlor 21+32, 21, 21
+ xxlor 22+32, 22, 22
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block_dec
+
+ vor 30,29,29
+
+.Loop_last_block_dec:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10,240(6)
+
+ cmpdi 12, 16
+ blt Final_block_dec
+
+Next_rem_block_dec:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x_dec
+
+Do_next_1x_dec:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block_dec
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block_dec:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x_dec
+
+Do_final_1x_dec:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
diff --git a/sys/modules/ossl/Makefile b/sys/modules/ossl/Makefile
--- a/sys/modules/ossl/Makefile
+++ b/sys/modules/ossl/Makefile
@@ -25,7 +25,7 @@
sha256-armv4.S \
sha512-armv4.S \
ossl_arm.c \
- ossl_aes_gcm.c
+ ossl_aes_gcm_neon.c
SRCS.aarch64= \
chacha-armv8.S \
@@ -47,6 +47,7 @@
sha256-x86_64.S \
sha512-x86_64.S \
ossl_aes_gcm.c \
+ ossl_aes_gcm_avx512.c \
ossl_x86.c
SRCS.i386= \
@@ -59,6 +60,8 @@
ossl_x86.c
SRCS.powerpc64le= \
+ aes-gcm-ppc.S \
+ ossl_aes_gcm.c \
ossl_ppccap.c \
aes-ppc.S \
aesp8-ppc.S \
@@ -80,6 +83,8 @@
x25519-ppc64.S
SRCS.powerpc64= \
+ aes-gcm-ppc.S \
+ ossl_aes_gcm.c \
ossl_ppccap.c \
aes-ppc.S \
aesp8-ppc.S \

File Metadata

Mime Type
text/plain
Expires
Mon, Jan 27, 7:34 AM (2 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16189484
Default Alt Text
D44274.diff (67 KB)

Event Timeline