Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F108555465
D44274.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
67 KB
Referenced Files
None
Subscribers
None
D44274.diff
View Options
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -107,7 +107,8 @@
crypto/openssl/amd64/sha1-x86_64.S optional ossl
crypto/openssl/amd64/sha256-x86_64.S optional ossl
crypto/openssl/amd64/sha512-x86_64.S optional ossl
-crypto/openssl/amd64/ossl_aes_gcm.c optional ossl
+crypto/openssl/amd64/ossl_aes_gcm_avx512.c optional ossl
+crypto/openssl/ossl_aes_gcm.c optional ossl
dev/amdgpio/amdgpio.c optional amdgpio
dev/axgbe/if_axgbe_pci.c optional axp
dev/axgbe/xgbe-desc.c optional axp
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -132,7 +132,7 @@
libkern/umoddi3.c standard
crypto/openssl/ossl_arm.c optional ossl
-crypto/openssl/arm/ossl_aes_gcm.c optional ossl
+crypto/openssl/arm/ossl_aes_gcm_neon.c optional ossl
crypto/openssl/arm/aes-armv4.S optional ossl \
compile-with "${NORMAL_C} -I${SRCTOP}/sys/crypto/openssl"
crypto/openssl/arm/bsaes-armv7.S optional ossl \
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -22,9 +22,11 @@
# openssl ppc common files
crypto/openssl/ossl_ppc.c optional ossl powerpc64 | ossl powerpc64le
+crypto/openssl/ossl_aes_gcm.c optional ossl powerpc64 | ossl powerpc64le
# openssl assembly files (powerpc64le)
crypto/openssl/powerpc64le/aes-ppc.S optional ossl powerpc64le
+crypto/openssl/powerpc64le/aes-gcm-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/aesp8-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/chacha-ppc.S optional ossl powerpc64le
crypto/openssl/powerpc64le/ecp_nistz256-ppc64.S optional ossl powerpc64le
@@ -45,6 +47,7 @@
# openssl assembly files (powerpc64)
crypto/openssl/powerpc64/aes-ppc.S optional ossl powerpc64
+crypto/openssl/powerpc64/aes-gcm-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/aesp8-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/chacha-ppc.S optional ossl powerpc64
crypto/openssl/powerpc64/ecp_nistz256-ppc64.S optional ossl powerpc64
diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c
new file mode 100644
--- /dev/null
+++ b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2021, Intel Corporation. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
+ * this file except in compliance with the License. You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+/*
+ * This file contains an AES-GCM wrapper implementation from OpenSSL, using
+ * VAES extensions. It was ported from cipher_aes_gcm_hw_vaes_avx512.inc.
+ */
+
+#include <sys/endian.h>
+#include <sys/systm.h>
+
+#include <crypto/openssl/ossl.h>
+#include <crypto/openssl/ossl_aes_gcm.h>
+#include <crypto/openssl/ossl_cipher.h>
+
+#include <opencrypto/cryptodev.h>
+
+_Static_assert(
+ sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
+ "ossl_gcm_context too large");
+
+void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
+
+static void
+gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+{
+ KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
+ ("%s: invalid key length %zu", __func__, keylen));
+
+ memset(&ctx->gcm, 0, sizeof(ctx->gcm));
+ memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
+ aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
+ ctx->ops->init(ctx, key, keylen);
+}
+
+static void
+gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
+{
+ (void)ctx->ops->finish(ctx, NULL, 0);
+ memcpy(tag, ctx->gcm.Xi.c, len);
+}
+
+void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
+void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
+void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
+ const unsigned char *iv, size_t ivlen);
+void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
+ size_t len);
+void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
+ unsigned int *pblocklen, const unsigned char *in, size_t len,
+ unsigned char *out);
+void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
+ unsigned int *pblocklen, const unsigned char *in, size_t len,
+ unsigned char *out);
+void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
+
+static void
+gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+{
+ ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
+}
+
+static void
+gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
+ size_t len)
+{
+ KASSERT(len == AES_GCM_IV_LEN,
+ ("%s: invalid IV length %zu", __func__, len));
+
+ ctx->gcm.Yi.u[0] = 0; /* Current counter */
+ ctx->gcm.Yi.u[1] = 0;
+ ctx->gcm.Xi.u[0] = 0; /* AAD hash */
+ ctx->gcm.Xi.u[1] = 0;
+ ctx->gcm.len.u[0] = 0; /* AAD length */
+ ctx->gcm.len.u[1] = 0; /* Message length */
+ ctx->gcm.ares = 0;
+ ctx->gcm.mres = 0;
+
+ ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
+}
+
+static int
+gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
+ size_t len)
+{
+ uint64_t alen = ctx->gcm.len.u[0];
+ size_t lenblks;
+ unsigned int ares;
+
+ /* Bad sequence: call of AAD update after message processing */
+ if (ctx->gcm.len.u[1])
+ return -2;
+
+ alen += len;
+ /* AAD is limited by 2^64 bits, thus 2^61 bytes */
+ if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
+ return -1;
+ ctx->gcm.len.u[0] = alen;
+
+ ares = ctx->gcm.ares;
+ /* Partial AAD block left from previous AAD update calls */
+ if (ares > 0) {
+ /*
+ * Fill partial block buffer till full block
+ * (note, the hash is stored reflected)
+ */
+ while (ares > 0 && len > 0) {
+ ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
+ --len;
+ ares = (ares + 1) % AES_BLOCK_LEN;
+ }
+ /* Full block gathered */
+ if (ares == 0) {
+ ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
+ } else { /* no more AAD */
+ ctx->gcm.ares = ares;
+ return 0;
+ }
+ }
+
+ /* Bulk AAD processing */
+ lenblks = len & ((size_t)(-AES_BLOCK_LEN));
+ if (lenblks > 0) {
+ ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
+ aad += lenblks;
+ len -= lenblks;
+ }
+
+ /* Add remaining AAD to the hash (note, the hash is stored reflected) */
+ if (len > 0) {
+ ares = (unsigned int)len;
+ for (size_t i = 0; i < len; ++i)
+ ctx->gcm.Xi.c[15 - i] ^= aad[i];
+ }
+
+ ctx->gcm.ares = ares;
+
+ return 0;
+}
+
+static int
+_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len, bool encrypt)
+{
+ uint64_t mlen = ctx->gcm.len.u[1];
+
+ mlen += len;
+ if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
+ return -1;
+
+ ctx->gcm.len.u[1] = mlen;
+
+ /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
+ if (ctx->gcm.ares > 0) {
+ ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
+ ctx->gcm.ares = 0;
+ }
+
+ if (encrypt) {
+ ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
+ in, len, out);
+ } else {
+ ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
+ in, len, out);
+ }
+
+ return 0;
+}
+
+static int
+gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len)
+{
+ return _gcm_encrypt_avx512(ctx, in, out, len, true);
+}
+
+static int
+gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
+ unsigned char *out, size_t len)
+{
+ return _gcm_encrypt_avx512(ctx, in, out, len, false);
+}
+
+static int
+gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
+ size_t len)
+{
+ unsigned int *res = &ctx->gcm.mres;
+
+ /* Finalize AAD processing */
+ if (ctx->gcm.ares > 0)
+ res = &ctx->gcm.ares;
+
+ ossl_aes_gcm_finalize_avx512(ctx, *res);
+
+ ctx->gcm.ares = ctx->gcm.mres = 0;
+
+ if (tag != NULL)
+ return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
+ return 0;
+}
+
+static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
+ .init = gcm_init_avx512,
+ .setiv = gcm_setiv_avx512,
+ .aad = gcm_aad_avx512,
+ .encrypt = gcm_encrypt_avx512,
+ .decrypt = gcm_decrypt_avx512,
+ .finish = gcm_finish_avx512,
+ .tag = gcm_tag,
+};
+
+int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
+
+int
+ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
+ void *_ctx)
+{
+ struct ossl_gcm_context *ctx;
+
+ ctx = _ctx;
+ ctx->ops = &gcm_ops_avx512;
+ gcm_init(ctx, key, klen);
+ return (0);
+}
diff --git a/sys/crypto/openssl/arm/ossl_aes_gcm.c b/sys/crypto/openssl/arm/ossl_aes_gcm_neon.c
rename from sys/crypto/openssl/arm/ossl_aes_gcm.c
rename to sys/crypto/openssl/arm/ossl_aes_gcm_neon.c
diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm.c b/sys/crypto/openssl/ossl_aes_gcm.c
rename from sys/crypto/openssl/amd64/ossl_aes_gcm.c
rename to sys/crypto/openssl/ossl_aes_gcm.c
--- a/sys/crypto/openssl/amd64/ossl_aes_gcm.c
+++ b/sys/crypto/openssl/ossl_aes_gcm.c
@@ -1,6 +1,7 @@
/*
* Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2021, Intel Corporation. All Rights Reserved.
+ * Copyright (c) 2023, Raptor Engineering, LLC. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
@@ -9,11 +10,10 @@
*/
/*
- * This file contains 2 AES-GCM wrapper implementations from OpenSSL, using
- * AES-NI and VAES extensions respectively. These were ported from
- * cipher_aes_gcm_hw_aesni.inc and cipher_aes_gcm_hw_vaes_avx512.inc. The
- * AES-NI implementation makes use of a generic C implementation for partial
- * blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined.
+ * This file contains an AES-GCM wrapper implementation from OpenSSL, using
+ * AES-NI (x86) or POWER8 Crypto Extensions (ppc). It was ported from
+ * cipher_aes_gcm_hw_aesni.inc and it makes use of a generic C implementation
+ * for partial blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined.
*/
#include <sys/endian.h>
@@ -29,225 +29,152 @@
sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
"ossl_gcm_context too large");
-void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
-
-static void
-gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
-{
- KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
- ("%s: invalid key length %zu", __func__, keylen));
-
- memset(&ctx->gcm, 0, sizeof(ctx->gcm));
- memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
- aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
- ctx->ops->init(ctx, key, keylen);
-}
-
-static void
-gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
-{
- (void)ctx->ops->finish(ctx, NULL, 0);
- memcpy(tag, ctx->gcm.Xi.c, len);
-}
+#if defined(__amd64__) || defined(__i386__)
+#define AES_set_encrypt_key aesni_set_encrypt_key
+#define AES_gcm_encrypt aesni_gcm_encrypt
+#define AES_gcm_decrypt aesni_gcm_decrypt
+#define AES_encrypt aesni_encrypt
+#define AES_ctr32_encrypt_blocks aesni_ctr32_encrypt_blocks
+#define GCM_init gcm_init_avx
+#define GCM_gmult gcm_gmult_avx
+#define GCM_ghash gcm_ghash_avx
+
+void AES_set_encrypt_key(const void *key, int bits, void *ctx);
+size_t AES_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+size_t AES_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks);
+void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t blocks, void *ks, const unsigned char *iv);
-void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
-void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
-void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
- const unsigned char *iv, size_t ivlen);
-void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
+void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]);
+void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]);
+void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
size_t len);
-void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
- unsigned int *pblocklen, const unsigned char *in, size_t len,
- unsigned char *out);
-void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
- unsigned int *pblocklen, const unsigned char *in, size_t len,
- unsigned char *out);
-void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
-
-static void
-gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
-{
- ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
-}
-static void
-gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
- size_t len)
-{
- KASSERT(len == AES_GCM_IV_LEN,
- ("%s: invalid IV length %zu", __func__, len));
+#elif defined(__powerpc64__)
+#define AES_set_encrypt_key aes_p8_set_encrypt_key
+#define AES_gcm_encrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,1)
+#define AES_gcm_decrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,0)
+#define AES_encrypt aes_p8_encrypt
+#define AES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
+#define GCM_init gcm_init_p8
+#define GCM_gmult gcm_gmult_p8
+#define GCM_ghash gcm_ghash_p8
+
+size_t ppc_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
+size_t ppc_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
+ const void *key, unsigned char ivec[16], uint64_t *Xi);
- ctx->gcm.Yi.u[0] = 0; /* Current counter */
- ctx->gcm.Yi.u[1] = 0;
- ctx->gcm.Xi.u[0] = 0; /* AAD hash */
- ctx->gcm.Xi.u[1] = 0;
- ctx->gcm.len.u[0] = 0; /* AAD length */
- ctx->gcm.len.u[1] = 0; /* Message length */
- ctx->gcm.ares = 0;
- ctx->gcm.mres = 0;
+void AES_set_encrypt_key(const void *key, int bits, void *ctx);
+void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks);
+void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+ size_t blocks, void *ks, const unsigned char *iv);
- ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
-}
+void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]);
+void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]);
+void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
+ size_t len);
-static int
-gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
- size_t len)
+static size_t
+ppc_aes_gcm_crypt(const unsigned char *in, unsigned char *out,
+ size_t len, const void *key, unsigned char ivec_[16], uint64_t *Xi,
+ int encrypt)
{
- uint64_t alen = ctx->gcm.len.u[0];
- size_t lenblks;
- unsigned int ares;
-
- /* Bad sequence: call of AAD update after message processing */
- if (ctx->gcm.len.u[1])
- return -2;
-
- alen += len;
- /* AAD is limited by 2^64 bits, thus 2^61 bytes */
- if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
- return -1;
- ctx->gcm.len.u[0] = alen;
+ union {
+ uint32_t d[4];
+ uint8_t c[16];
+ } *ivec = (void *)ivec_;
+ int s = 0;
+ int ndone = 0;
+ int ctr_reset = 0;
+ uint32_t ivec_val;
+ uint64_t blocks_unused;
+ uint64_t nb = len / 16;
+ uint64_t next_ctr = 0;
+ unsigned char ctr_saved[12];
+
+ memcpy(ctr_saved, ivec, 12);
+
+ while (nb) {
+ ivec_val = ivec->d[3];
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ivec_val = bswap32(ivec_val);
+#endif
- ares = ctx->gcm.ares;
- /* Partial AAD block left from previous AAD update calls */
- if (ares > 0) {
- /*
- * Fill partial block buffer till full block
- * (note, the hash is stored reflected)
- */
- while (ares > 0 && len > 0) {
- ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
- --len;
- ares = (ares + 1) % AES_BLOCK_LEN;
- }
- /* Full block gathered */
- if (ares == 0) {
- ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
- } else { /* no more AAD */
- ctx->gcm.ares = ares;
- return 0;
+ blocks_unused = (uint64_t) 0xffffffffU + 1 - (uint64_t)ivec_val;
+ if (nb > blocks_unused) {
+ len = blocks_unused * 16;
+ nb -= blocks_unused;
+ next_ctr = blocks_unused;
+ ctr_reset = 1;
+ } else {
+ len = nb * 16;
+ next_ctr = nb;
+ nb = 0;
}
- }
- /* Bulk AAD processing */
- lenblks = len & ((size_t)(-AES_BLOCK_LEN));
- if (lenblks > 0) {
- ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
- aad += lenblks;
- len -= lenblks;
- }
+ s = encrypt ? ppc_aes_gcm_encrypt(in, out, len, key, ivec->c, Xi) :
+ ppc_aes_gcm_decrypt(in, out, len, key, ivec->c, Xi);
- /* Add remaining AAD to the hash (note, the hash is stored reflected) */
- if (len > 0) {
- ares = (unsigned int)len;
- for (size_t i = 0; i < len; ++i)
- ctx->gcm.Xi.c[15 - i] ^= aad[i];
+ /* add counter to ivec */
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ivec->d[3] = bswap32(ivec_val + next_ctr);
+#else
+ ivec->d[3] += next_ctr;
+#endif
+ if (ctr_reset) {
+ ctr_reset = 0;
+ in += len;
+ out += len;
+ }
+ memcpy(ivec, ctr_saved, 12);
+ ndone += s;
}
- ctx->gcm.ares = ares;
-
- return 0;
+ return ndone;
}
-static int
-_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len, bool encrypt)
-{
- uint64_t mlen = ctx->gcm.len.u[1];
-
- mlen += len;
- if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
- return -1;
-
- ctx->gcm.len.u[1] = mlen;
-
- /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
- if (ctx->gcm.ares > 0) {
- ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
- ctx->gcm.ares = 0;
- }
-
- if (encrypt) {
- ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
- in, len, out);
- } else {
- ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
- in, len, out);
- }
-
- return 0;
-}
+#else
+#error "Unsupported architecture!"
+#endif
-static int
-gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len)
+static void
+gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
- return _gcm_encrypt_avx512(ctx, in, out, len, true);
-}
+ KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
+ ("%s: invalid key length %zu", __func__, keylen));
-static int
-gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
- unsigned char *out, size_t len)
-{
- return _gcm_encrypt_avx512(ctx, in, out, len, false);
+ memset(&ctx->gcm, 0, sizeof(ctx->gcm));
+ memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
+ AES_set_encrypt_key(key, keylen, &ctx->aes_ks);
+ ctx->ops->init(ctx, key, keylen);
}
-static int
-gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
- size_t len)
+static void
+gcm_tag_op(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
{
- unsigned int *res = &ctx->gcm.mres;
-
- /* Finalize AAD processing */
- if (ctx->gcm.ares > 0)
- res = &ctx->gcm.ares;
-
- ossl_aes_gcm_finalize_avx512(ctx, *res);
-
- ctx->gcm.ares = ctx->gcm.mres = 0;
-
- if (tag != NULL)
- return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
- return 0;
+ (void)ctx->ops->finish(ctx, NULL, 0);
+ memcpy(tag, ctx->gcm.Xi.c, len);
}
-static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
- .init = gcm_init_avx512,
- .setiv = gcm_setiv_avx512,
- .aad = gcm_aad_avx512,
- .encrypt = gcm_encrypt_avx512,
- .decrypt = gcm_decrypt_avx512,
- .finish = gcm_finish_avx512,
- .tag = gcm_tag,
-};
-
-size_t aesni_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16], uint64_t *Xi);
-size_t aesni_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
- const void *key, unsigned char ivec[16], uint64_t *Xi);
-void aesni_encrypt(const unsigned char *in, unsigned char *out, void *ks);
-void aesni_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
- size_t blocks, void *ks, const unsigned char *iv);
-
-void gcm_init_avx(__uint128_t Htable[16], uint64_t Xi[2]);
-void gcm_gmult_avx(uint64_t Xi[2], const __uint128_t Htable[16]);
-void gcm_ghash_avx(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
- size_t len);
-
static void
-gcm_init_aesni(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
+gcm_init_op(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
{
- aesni_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks);
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.H.u[0] = bswap64(ctx->gcm.H.u[0]);
ctx->gcm.H.u[1] = bswap64(ctx->gcm.H.u[1]);
#endif
- gcm_init_avx(ctx->gcm.Htable, ctx->gcm.H.u);
+ GCM_init(ctx->gcm.Htable, ctx->gcm.H.u);
}
static void
-gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv,
+gcm_setiv_op(struct ossl_gcm_context *ctx, const unsigned char *iv,
size_t len)
{
uint32_t ctr;
@@ -269,7 +196,7 @@
ctx->gcm.Xi.u[0] = 0;
ctx->gcm.Xi.u[1] = 0;
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks);
ctr++;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -280,7 +207,7 @@
}
static int
-gcm_aad_aesni(struct ossl_gcm_context *ctx, const unsigned char *aad,
+gcm_aad_op(struct ossl_gcm_context *ctx, const unsigned char *aad,
size_t len)
{
size_t i;
@@ -303,14 +230,14 @@
n = (n + 1) % 16;
}
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
else {
ctx->gcm.ares = n;
return 0;
}
}
if ((i = (len & (size_t)-AES_BLOCK_LEN))) {
- gcm_ghash_avx(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i);
+ GCM_ghash(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i);
aad += i;
len -= i;
}
@@ -341,7 +268,7 @@
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -354,7 +281,7 @@
n = mres % 16;
for (i = 0; i < len; ++i) {
if (n == 0) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
&ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -366,7 +293,7 @@
ctx->gcm.Xi.c[n] ^= out[i] = in[i] ^ ctx->gcm.EKi.c[n];
mres = n = (n + 1) % 16;
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
}
ctx->gcm.mres = mres;
@@ -390,7 +317,7 @@
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -408,7 +335,7 @@
n = (n + 1) % 16;
}
if (n == 0) {
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
mres = 0;
} else {
ctx->gcm.mres = n;
@@ -418,7 +345,7 @@
if ((i = (len & (size_t)-16))) {
size_t j = i / 16;
- aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
+ AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
ctr += (unsigned int)j;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -430,12 +357,12 @@
while (j--) {
for (i = 0; i < 16; ++i)
ctx->gcm.Xi.c[i] ^= out[i];
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
out += 16;
}
}
if (len) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -453,7 +380,7 @@
}
static int
-gcm_encrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
+gcm_encrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
size_t bulk = 0, res;
@@ -463,7 +390,7 @@
if ((error = gcm_encrypt(ctx, in, out, res)) != 0)
return error;
- bulk = aesni_gcm_encrypt(in + res, out + res, len - res,
+ bulk = AES_gcm_encrypt(in + res, out + res, len - res,
&ctx->aes_ks, ctx->gcm.Yi.c, ctx->gcm.Xi.u);
ctx->gcm.len.u[1] += bulk;
bulk += res;
@@ -492,7 +419,7 @@
if (ctx->gcm.ares) {
/* First call to encrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -506,7 +433,7 @@
for (i = 0; i < len; ++i) {
uint8_t c;
if (n == 0) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
&ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
@@ -520,7 +447,7 @@
ctx->gcm.Xi.c[n] ^= c;
mres = n = (n + 1) % 16;
if (n == 0)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
}
ctx->gcm.mres = mres;
@@ -544,7 +471,7 @@
if (ctx->gcm.ares) {
/* First call to decrypt finalizes GHASH(AAD) */
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.ares = 0;
}
@@ -564,7 +491,7 @@
n = (n + 1) % 16;
}
if (n == 0) {
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
mres = 0;
} else {
ctx->gcm.mres = n;
@@ -578,12 +505,12 @@
size_t k;
for (k = 0; k < 16; ++k)
ctx->gcm.Xi.c[k] ^= in[k];
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
in += 16;
}
j = i / 16;
in -= i;
- aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
+ AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
ctr += (unsigned int)j;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -595,7 +522,7 @@
len -= i;
}
if (len) {
- aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
+ AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
++ctr;
#if BYTE_ORDER == LITTLE_ENDIAN
ctx->gcm.Yi.d[3] = bswap32(ctr);
@@ -615,7 +542,7 @@
}
static int
-gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
+gcm_decrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in,
unsigned char *out, size_t len)
{
size_t bulk = 0, res;
@@ -625,8 +552,8 @@
if ((error = gcm_decrypt(ctx, in, out, res)) != 0)
return error;
- bulk = aesni_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks,
- ctx->gcm.Yi.c, ctx->gcm.Xi.u);
+ bulk = AES_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks,
+ ctx->gcm.Yi.c, ctx->gcm.Xi.u);
ctx->gcm.len.u[1] += bulk;
bulk += res;
@@ -637,14 +564,14 @@
}
static int
-gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag,
+gcm_finish_op(struct ossl_gcm_context *ctx, const unsigned char *tag,
size_t len)
{
uint64_t alen = ctx->gcm.len.u[0] << 3;
uint64_t clen = ctx->gcm.len.u[1] << 3;
if (ctx->gcm.mres || ctx->gcm.ares)
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
#if BYTE_ORDER == LITTLE_ENDIAN
alen = bswap64(alen);
@@ -653,7 +580,7 @@
ctx->gcm.Xi.u[0] ^= alen;
ctx->gcm.Xi.u[1] ^= clen;
- gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
+ GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable);
ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0];
ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1];
@@ -663,40 +590,26 @@
return 0;
}
-static const struct ossl_aes_gcm_ops gcm_ops_aesni = {
- .init = gcm_init_aesni,
- .setiv = gcm_setiv_aesni,
- .aad = gcm_aad_aesni,
- .encrypt = gcm_encrypt_aesni,
- .decrypt = gcm_decrypt_aesni,
- .finish = gcm_finish_aesni,
- .tag = gcm_tag,
+static const struct ossl_aes_gcm_ops gcm_ops = {
+ .init = gcm_init_op,
+ .setiv = gcm_setiv_op,
+ .aad = gcm_aad_op,
+ .encrypt = gcm_encrypt_op,
+ .decrypt = gcm_decrypt_op,
+ .finish = gcm_finish_op,
+ .tag = gcm_tag_op,
};
-int ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, void *_ctx);
-
-int
-ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen,
- void *_ctx)
-{
- struct ossl_gcm_context *ctx;
-
- ctx = _ctx;
- ctx->ops = &gcm_ops_aesni;
- gcm_init(ctx, key, klen);
- return (0);
-}
-
-int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
+int ossl_aes_gcm_setkey(const unsigned char *key, int klen, void *_ctx);
int
-ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
+ossl_aes_gcm_setkey(const unsigned char *key, int klen,
void *_ctx)
{
struct ossl_gcm_context *ctx;
ctx = _ctx;
- ctx->ops = &gcm_ops_avx512;
+ ctx->ops = &gcm_ops;
gcm_init(ctx, key, klen);
return (0);
}
diff --git a/sys/crypto/openssl/ossl_ppc.c b/sys/crypto/openssl/ossl_ppc.c
--- a/sys/crypto/openssl/ossl_ppc.c
+++ b/sys/crypto/openssl/ossl_ppc.c
@@ -38,9 +38,12 @@
ossl_cipher_setkey_t aes_p8_set_encrypt_key;
ossl_cipher_setkey_t aes_p8_set_decrypt_key;
+
ossl_cipher_setkey_t vpaes_set_encrypt_key;
ossl_cipher_setkey_t vpaes_set_decrypt_key;
+ossl_cipher_setkey_t ossl_aes_gcm_setkey;
+
void
ossl_cpuid(struct ossl_softc *sc)
{
@@ -75,7 +78,11 @@
ossl_cipher_aes_cbc.set_encrypt_key = aes_p8_set_encrypt_key;
ossl_cipher_aes_cbc.set_decrypt_key = aes_p8_set_decrypt_key;
sc->has_aes = true;
- } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) {
+
+ ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey;
+ ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey;
+ sc->has_aes_gcm = true;
+ } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) {
ossl_cipher_aes_cbc.set_encrypt_key = vpaes_set_encrypt_key;
ossl_cipher_aes_cbc.set_decrypt_key = vpaes_set_decrypt_key;
sc->has_aes = true;
diff --git a/sys/crypto/openssl/ossl_x86.c b/sys/crypto/openssl/ossl_x86.c
--- a/sys/crypto/openssl/ossl_x86.c
+++ b/sys/crypto/openssl/ossl_x86.c
@@ -56,7 +56,7 @@
#ifdef __amd64__
int ossl_vaes_vpclmulqdq_capable(void);
-ossl_cipher_setkey_t ossl_aes_gcm_setkey_aesni;
+ossl_cipher_setkey_t ossl_aes_gcm_setkey;
ossl_cipher_setkey_t ossl_aes_gcm_setkey_avx512;
#endif
@@ -141,8 +141,8 @@
} else if ((cpu_feature2 &
(CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) ==
(CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) {
- ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey_aesni;
- ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey_aesni;
+ ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey;
+ ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey;
sc->has_aes_gcm = true;
} else {
sc->has_aes_gcm = false;
diff --git a/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S
new file mode 100644
--- /dev/null
+++ b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S
@@ -0,0 +1,1338 @@
+.machine "any"
+.text
+
+
+
+
+
+.macro .Loop_aes_middle4x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+.endm
+
+
+
+
+
+.macro .Loop_aes_middle8x
+ xxlor 23+32, 1, 1
+ xxlor 24+32, 2, 2
+ xxlor 25+32, 3, 3
+ xxlor 26+32, 4, 4
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 5, 5
+ xxlor 24+32, 6, 6
+ xxlor 25+32, 7, 7
+ xxlor 26+32, 8, 8
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+.endm
+
+
+
+
+ppc_aes_gcm_ghash:
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+ blr
+
+
+
+
+
+.macro ppc_aes_gcm_ghash2_4x
+
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 27, 23, 27
+
+
+ .long 0x1309A4C8
+ .long 0x1326ACC8
+ .long 0x1343B4C8
+ vxor 19, 19, 27
+ .long 0x12EC9CC8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D9CC8
+ .long 0x132AA4C8
+ .long 0x1347ACC8
+ .long 0x1364B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E9CC8
+ .long 0x132BA4C8
+ .long 0x1348ACC8
+ .long 0x1365B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+.endm
+
+
+
+
+.macro ppc_update_hash_1x
+ vxor 28, 28, 0
+
+ vxor 19, 19, 19
+
+ .long 0x12C3E4C8
+ .long 0x12E4E4C8
+ .long 0x1305E4C8
+
+ .long 0x137614C8
+
+ vsldoi 25, 23, 19, 8
+ vsldoi 26, 19, 23, 8
+ vxor 22, 22, 25
+ vxor 24, 24, 26
+
+ vsldoi 22, 22, 22, 8
+ vxor 22, 22, 27
+
+ vsldoi 20, 22, 22, 8
+ .long 0x12D614C8
+ vxor 20, 20, 24
+ vxor 22, 22, 20
+
+ vor 0,22,22
+
+.endm
+
+
+
+
+
+
+
+
+
+
+
+
+
+.global ppc_aes_gcm_encrypt
+.align 5
+ppc_aes_gcm_encrypt:
+_ppc_aes_gcm_encrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_ghash
+ b aes_gcm_out
+
+Do_next_ghash:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block
+
+ vor 30,29,29
+
+.Loop_last_block:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10, 240(6)
+
+ cmpdi 12, 16
+ blt Final_block
+
+.macro .Loop_aes_middle_1x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 9, 9
+ .long 0x11EF9D08
+.endm
+
+Next_rem_block:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x
+
+Do_next_1x:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x
+
+Do_final_1x:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
+
+
+
+
+
+
+
+Write_partial_block:
+ li 10, 192
+ stxvb16x 15+32, 10, 1
+
+
+ addi 10, 9, -1
+ addi 16, 1, 191
+
+ mtctr 12
+ li 15, 0
+
+Write_last_byte:
+ lbzu 14, 1(16)
+ stbu 14, 1(10)
+ bdnz Write_last_byte
+ blr
+
+aes_gcm_out:
+
+ stxvb16x 32, 0, 8
+ add 3, 11, 12
+
+ li 9, 256
+ lvx 20, 9, 1
+ addi 9, 9, 16
+ lvx 21, 9, 1
+ addi 9, 9, 16
+ lvx 22, 9, 1
+ addi 9, 9, 16
+ lvx 23, 9, 1
+ addi 9, 9, 16
+ lvx 24, 9, 1
+ addi 9, 9, 16
+ lvx 25, 9, 1
+ addi 9, 9, 16
+ lvx 26, 9, 1
+ addi 9, 9, 16
+ lvx 27, 9, 1
+ addi 9, 9, 16
+ lvx 28, 9, 1
+ addi 9, 9, 16
+ lvx 29, 9, 1
+ addi 9, 9, 16
+ lvx 30, 9, 1
+ addi 9, 9, 16
+ lvx 31, 9, 1
+
+ ld 0, 528(1)
+ ld 14,112(1)
+ ld 15,120(1)
+ ld 16,128(1)
+ ld 17,136(1)
+ ld 18,144(1)
+ ld 19,152(1)
+ ld 20,160(1)
+ ld 21,168(1)
+
+ mtlr 0
+ addi 1, 1, 512
+ blr
+
+
+
+
+.global ppc_aes_gcm_decrypt
+.align 5
+ppc_aes_gcm_decrypt:
+_ppc_aes_gcm_decrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x_dec
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x_dec:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block_dec
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block_dec:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_last_aes_dec
+ b aes_gcm_out
+
+Do_last_aes_dec:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+ xxlor 15+32, 15, 15
+ xxlor 16+32, 16, 16
+ xxlor 17+32, 17, 17
+ xxlor 18+32, 18, 18
+ xxlor 19+32, 19, 19
+ xxlor 20+32, 20, 20
+ xxlor 21+32, 21, 21
+ xxlor 22+32, 22, 22
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block_dec
+
+ vor 30,29,29
+
+.Loop_last_block_dec:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10,240(6)
+
+ cmpdi 12, 16
+ blt Final_block_dec
+
+Next_rem_block_dec:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x_dec
+
+Do_next_1x_dec:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block_dec
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block_dec:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x_dec
+
+Do_final_1x_dec:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
diff --git a/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S b/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S
new file mode 100644
--- /dev/null
+++ b/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S
@@ -0,0 +1,1340 @@
+/* Do not modify. This file is auto-generated from aes-ppc.pl. */
+.machine "any"
+.abiversion 2
+.text
+
+
+
+
+
+.macro .Loop_aes_middle4x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x12109D08
+ .long 0x12319D08
+ .long 0x12529D08
+
+ .long 0x11EFA508
+ .long 0x1210A508
+ .long 0x1231A508
+ .long 0x1252A508
+
+ .long 0x11EFAD08
+ .long 0x1210AD08
+ .long 0x1231AD08
+ .long 0x1252AD08
+
+ .long 0x11EFB508
+ .long 0x1210B508
+ .long 0x1231B508
+ .long 0x1252B508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+.endm
+
+
+
+
+
+.macro .Loop_aes_middle8x
+ xxlor 23+32, 1, 1
+ xxlor 24+32, 2, 2
+ xxlor 25+32, 3, 3
+ xxlor 26+32, 4, 4
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 5, 5
+ xxlor 24+32, 6, 6
+ xxlor 25+32, 7, 7
+ xxlor 26+32, 8, 8
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+
+ xxlor 23+32, 9, 9
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+.endm
+
+
+
+
+ppc_aes_gcm_ghash:
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+ blr
+
+
+
+
+
+.macro ppc_aes_gcm_ghash2_4x
+
+ vxor 15, 15, 0
+
+ xxlxor 29, 29, 29
+
+ .long 0x12EC7CC8
+ .long 0x130984C8
+ .long 0x13268CC8
+ .long 0x134394C8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D7CC8
+ .long 0x132A84C8
+ .long 0x13478CC8
+ .long 0x136494C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E7CC8
+ .long 0x132B84C8
+ .long 0x13488CC8
+ .long 0x136594C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 27, 23, 27
+
+
+ .long 0x1309A4C8
+ .long 0x1326ACC8
+ .long 0x1343B4C8
+ vxor 19, 19, 27
+ .long 0x12EC9CC8
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26
+
+ .long 0x130D9CC8
+ .long 0x132AA4C8
+ .long 0x1347ACC8
+ .long 0x1364B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+
+
+ .long 0x139714C8
+
+ xxlor 29+32, 29, 29
+
+ vxor 24, 24, 27
+ vsldoi 26, 24, 29, 8
+ vsldoi 29, 29, 24, 8
+ vxor 23, 23, 26
+
+ vsldoi 23, 23, 23, 8
+ vxor 23, 23, 28
+
+ .long 0x130E9CC8
+ .long 0x132BA4C8
+ .long 0x1348ACC8
+ .long 0x1365B4C8
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26
+ vxor 24, 24, 27
+
+ vxor 24, 24, 29
+
+
+ vsldoi 27, 23, 23, 8
+ .long 0x12F714C8
+ vxor 27, 27, 24
+ vxor 23, 23, 27
+
+ xxlor 32, 23+32, 23+32
+
+.endm
+
+
+
+
+.macro ppc_update_hash_1x
+ vxor 28, 28, 0
+
+ vxor 19, 19, 19
+
+ .long 0x12C3E4C8
+ .long 0x12E4E4C8
+ .long 0x1305E4C8
+
+ .long 0x137614C8
+
+ vsldoi 25, 23, 19, 8
+ vsldoi 26, 19, 23, 8
+ vxor 22, 22, 25
+ vxor 24, 24, 26
+
+ vsldoi 22, 22, 22, 8
+ vxor 22, 22, 27
+
+ vsldoi 20, 22, 22, 8
+ .long 0x12D614C8
+ vxor 20, 20, 24
+ vxor 22, 22, 20
+
+ vor 0,22,22
+
+.endm
+
+
+
+
+
+
+
+
+
+
+
+
+
+.global ppc_aes_gcm_encrypt
+.align 5
+ppc_aes_gcm_encrypt:
+_ppc_aes_gcm_encrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_ghash
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_ghash
+ b aes_gcm_out
+
+Do_next_ghash:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block
+
+ vor 30,29,29
+
+.Loop_last_block:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10, 240(6)
+
+ cmpdi 12, 16
+ blt Final_block
+
+.macro .Loop_aes_middle_1x
+ xxlor 19+32, 1, 1
+ xxlor 20+32, 2, 2
+ xxlor 21+32, 3, 3
+ xxlor 22+32, 4, 4
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 5, 5
+ xxlor 20+32, 6, 6
+ xxlor 21+32, 7, 7
+ xxlor 22+32, 8, 8
+
+ .long 0x11EF9D08
+ .long 0x11EFA508
+ .long 0x11EFAD08
+ .long 0x11EFB508
+
+ xxlor 19+32, 9, 9
+ .long 0x11EF9D08
+.endm
+
+Next_rem_block:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x
+
+Do_next_1x:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x
+
+Do_final_1x:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ vor 28,15,15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
+
+
+
+
+
+
+
+Write_partial_block:
+ li 10, 192
+ stxvb16x 15+32, 10, 1
+
+
+ addi 10, 9, -1
+ addi 16, 1, 191
+
+ mtctr 12
+ li 15, 0
+
+Write_last_byte:
+ lbzu 14, 1(16)
+ stbu 14, 1(10)
+ bdnz Write_last_byte
+ blr
+
+aes_gcm_out:
+
+ stxvb16x 32, 0, 8
+ add 3, 11, 12
+
+ li 9, 256
+ lvx 20, 9, 1
+ addi 9, 9, 16
+ lvx 21, 9, 1
+ addi 9, 9, 16
+ lvx 22, 9, 1
+ addi 9, 9, 16
+ lvx 23, 9, 1
+ addi 9, 9, 16
+ lvx 24, 9, 1
+ addi 9, 9, 16
+ lvx 25, 9, 1
+ addi 9, 9, 16
+ lvx 26, 9, 1
+ addi 9, 9, 16
+ lvx 27, 9, 1
+ addi 9, 9, 16
+ lvx 28, 9, 1
+ addi 9, 9, 16
+ lvx 29, 9, 1
+ addi 9, 9, 16
+ lvx 30, 9, 1
+ addi 9, 9, 16
+ lvx 31, 9, 1
+
+ ld 0, 528(1)
+ ld 14,112(1)
+ ld 15,120(1)
+ ld 16,128(1)
+ ld 17,136(1)
+ ld 18,144(1)
+ ld 19,152(1)
+ ld 20,160(1)
+ ld 21,168(1)
+
+ mtlr 0
+ addi 1, 1, 512
+ blr
+
+
+
+
+.global ppc_aes_gcm_decrypt
+.align 5
+ppc_aes_gcm_decrypt:
+_ppc_aes_gcm_decrypt:
+
+ stdu 1,-512(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ li 9, 256
+ stvx 20, 9, 1
+ addi 9, 9, 16
+ stvx 21, 9, 1
+ addi 9, 9, 16
+ stvx 22, 9, 1
+ addi 9, 9, 16
+ stvx 23, 9, 1
+ addi 9, 9, 16
+ stvx 24, 9, 1
+ addi 9, 9, 16
+ stvx 25, 9, 1
+ addi 9, 9, 16
+ stvx 26, 9, 1
+ addi 9, 9, 16
+ stvx 27, 9, 1
+ addi 9, 9, 16
+ stvx 28, 9, 1
+ addi 9, 9, 16
+ stvx 29, 9, 1
+ addi 9, 9, 16
+ stvx 30, 9, 1
+ addi 9, 9, 16
+ stvx 31, 9, 1
+ std 0, 528(1)
+
+
+ lxvb16x 32, 0, 8
+
+
+ li 10, 32
+ lxvd2x 2+32, 10, 8
+ li 10, 48
+ lxvd2x 3+32, 10, 8
+ li 10, 64
+ lxvd2x 4+32, 10, 8
+ li 10, 80
+ lxvd2x 5+32, 10, 8
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8
+ li 10, 112
+ lxvd2x 7+32, 10, 8
+ li 10, 128
+ lxvd2x 8+32, 10, 8
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8
+ li 10, 160
+ lxvd2x 10+32, 10, 8
+ li 10, 176
+ lxvd2x 11+32, 10, 8
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8
+ li 10, 208
+ lxvd2x 13+32, 10, 8
+ li 10, 224
+ lxvd2x 14+32, 10, 8
+
+
+ lxvb16x 30+32, 0, 7
+
+ mr 12, 5
+ li 11, 0
+
+
+ vxor 31, 31, 31
+ vspltisb 22,1
+ vsldoi 31, 31, 22,1
+
+
+ lxv 0, 0(6)
+ lxv 1, 0x10(6)
+ lxv 2, 0x20(6)
+ lxv 3, 0x30(6)
+ lxv 4, 0x40(6)
+ lxv 5, 0x50(6)
+ lxv 6, 0x60(6)
+ lxv 7, 0x70(6)
+ lxv 8, 0x80(6)
+ lxv 9, 0x90(6)
+ lxv 10, 0xa0(6)
+
+
+ lwz 9,240(6)
+
+
+
+ xxlor 32+29, 0, 0
+ vxor 15, 30, 29
+
+ cmpdi 9, 10
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 11, 0xb0(6)
+ lxv 12, 0xc0(6)
+
+ cmpdi 9, 12
+ beq .Loop_aes_gcm_8x_dec
+
+
+ lxv 13, 0xd0(6)
+ lxv 14, 0xe0(6)
+ cmpdi 9, 14
+ beq .Loop_aes_gcm_8x_dec
+
+ b aes_gcm_out
+
+.align 5
+.Loop_aes_gcm_8x_dec:
+ mr 14, 3
+ mr 9, 4
+
+
+ li 10, 128
+ divdu 10, 5, 10
+ cmpdi 10, 0
+ beq .Loop_last_block_dec
+
+ .long 0x13DEF8C0
+ vxor 16, 30, 29
+ .long 0x13DEF8C0
+ vxor 17, 30, 29
+ .long 0x13DEF8C0
+ vxor 18, 30, 29
+ .long 0x13DEF8C0
+ vxor 19, 30, 29
+ .long 0x13DEF8C0
+ vxor 20, 30, 29
+ .long 0x13DEF8C0
+ vxor 21, 30, 29
+ .long 0x13DEF8C0
+ vxor 22, 30, 29
+
+ mtctr 10
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ lwz 10, 240(6)
+
+.Loop_8x_block_dec:
+
+ lxvb16x 15, 0, 14
+ lxvb16x 16, 15, 14
+ lxvb16x 17, 16, 14
+ lxvb16x 18, 17, 14
+ lxvb16x 19, 18, 14
+ lxvb16x 20, 19, 14
+ lxvb16x 21, 20, 14
+ lxvb16x 22, 21, 14
+ addi 14, 14, 128
+
+.Loop_aes_middle8x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_last_aes_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x1210BD08
+ .long 0x1231BD08
+ .long 0x1252BD08
+ .long 0x1273BD08
+ .long 0x1294BD08
+ .long 0x12B5BD08
+ .long 0x12D6BD08
+
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_last_aes_dec
+ b aes_gcm_out
+
+Do_last_aes_dec:
+
+
+
+ .long 0x11EFBD09
+ .long 0x1210BD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ xxlxor 48, 48, 16
+ stxvb16x 48, 15, 9
+
+ .long 0x1231BD09
+ .long 0x1252BD09
+
+ xxlxor 49, 49, 17
+ stxvb16x 49, 16, 9
+ xxlxor 50, 50, 18
+ stxvb16x 50, 17, 9
+
+ .long 0x1273BD09
+ .long 0x1294BD09
+
+ xxlxor 51, 51, 19
+ stxvb16x 51, 18, 9
+ xxlxor 52, 52, 20
+ stxvb16x 52, 19, 9
+
+ .long 0x12B5BD09
+ .long 0x12D6BD09
+
+ xxlxor 53, 53, 21
+ stxvb16x 53, 20, 9
+ xxlxor 54, 54, 22
+ stxvb16x 54, 21, 9
+
+ addi 9, 9, 128
+
+ xxlor 15+32, 15, 15
+ xxlor 16+32, 16, 16
+ xxlor 17+32, 17, 17
+ xxlor 18+32, 18, 18
+ xxlor 19+32, 19, 19
+ xxlor 20+32, 20, 20
+ xxlor 21+32, 21, 21
+ xxlor 22+32, 22, 22
+
+
+ ppc_aes_gcm_ghash2_4x
+
+ xxlor 27+32, 0, 0
+ .long 0x13DEF8C0
+ vor 29,30,30
+ vxor 15, 30, 27
+ .long 0x13DEF8C0
+ vxor 16, 30, 27
+ .long 0x13DEF8C0
+ vxor 17, 30, 27
+ .long 0x13DEF8C0
+ vxor 18, 30, 27
+ .long 0x13DEF8C0
+ vxor 19, 30, 27
+ .long 0x13DEF8C0
+ vxor 20, 30, 27
+ .long 0x13DEF8C0
+ vxor 21, 30, 27
+ .long 0x13DEF8C0
+ vxor 22, 30, 27
+ addi 12, 12, -128
+ addi 11, 11, 128
+
+ bdnz .Loop_8x_block_dec
+
+ vor 30,29,29
+
+.Loop_last_block_dec:
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+
+ li 10, 16
+ divdu 10, 12, 10
+
+ mtctr 10
+
+ lwz 10,240(6)
+
+ cmpdi 12, 16
+ blt Final_block_dec
+
+Next_rem_block_dec:
+ lxvb16x 15, 0, 14
+
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_next_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_next_1x_dec
+
+Do_next_1x_dec:
+ .long 0x11EFBD09
+
+ xxlxor 47, 47, 15
+ stxvb16x 47, 0, 9
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+ addi 12, 12, -16
+ addi 11, 11, 16
+ xxlor 19+32, 0, 0
+ .long 0x13DEF8C0
+ vxor 15, 30, 19
+
+ bdnz Next_rem_block_dec
+
+ cmpdi 12, 0
+ beq aes_gcm_out
+
+Final_block_dec:
+.Loop_aes_middle_1x
+
+ xxlor 23+32, 10, 10
+
+ cmpdi 10, 10
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 11, 11
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 12, 12
+
+ cmpdi 10, 12
+ beq Do_final_1x_dec
+
+
+ xxlor 24+32, 13, 13
+
+ .long 0x11EFBD08
+ .long 0x11EFC508
+
+ xxlor 23+32, 14, 14
+
+ cmpdi 10, 14
+ beq Do_final_1x_dec
+
+Do_final_1x_dec:
+ .long 0x11EFBD09
+
+ lxvb16x 15, 0, 14
+ xxlxor 47, 47, 15
+
+
+ li 15, 16
+ sub 15, 15, 12
+
+ vspltisb 16,-1
+ vspltisb 17,0
+ li 10, 192
+ stvx 16, 10, 1
+ addi 10, 10, 16
+ stvx 17, 10, 1
+
+ addi 10, 1, 192
+ lxvb16x 16, 15, 10
+ xxland 47, 47, 16
+
+ xxlor 28+32, 15, 15
+ ppc_update_hash_1x
+
+
+ bl Write_partial_block
+
+ b aes_gcm_out
diff --git a/sys/modules/ossl/Makefile b/sys/modules/ossl/Makefile
--- a/sys/modules/ossl/Makefile
+++ b/sys/modules/ossl/Makefile
@@ -25,7 +25,7 @@
sha256-armv4.S \
sha512-armv4.S \
ossl_arm.c \
- ossl_aes_gcm.c
+ ossl_aes_gcm_neon.c
SRCS.aarch64= \
chacha-armv8.S \
@@ -47,6 +47,7 @@
sha256-x86_64.S \
sha512-x86_64.S \
ossl_aes_gcm.c \
+ ossl_aes_gcm_avx512.c \
ossl_x86.c
SRCS.i386= \
@@ -59,6 +60,8 @@
ossl_x86.c
SRCS.powerpc64le= \
+ aes-gcm-ppc.S \
+ ossl_aes_gcm.c \
ossl_ppccap.c \
aes-ppc.S \
aesp8-ppc.S \
@@ -80,6 +83,8 @@
x25519-ppc64.S
SRCS.powerpc64= \
+ aes-gcm-ppc.S \
+ ossl_aes_gcm.c \
ossl_ppccap.c \
aes-ppc.S \
aesp8-ppc.S \
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Jan 27, 7:34 AM (2 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16189484
Default Alt Text
D44274.diff (67 KB)
Attached To
Mode
D44274: ossl: Add GCM support on powerpc64/powerpc64le (POWER8+)
Attached
Detach File
Event Timeline
Log In to Comment