mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2025-01-03 17:35:30 +00:00
aesni: Unroll aes rounds
This commit is contained in:
parent
3ef96cc1ad
commit
37d6376968
|
@ -117,14 +117,90 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx,
|
||||||
|
|
||||||
if (mode == MBEDTLS_AES_ENCRYPT)
|
if (mode == MBEDTLS_AES_ENCRYPT)
|
||||||
{
|
{
|
||||||
for (i = ctx->nr - 1; i; --i)
|
if (ctx->nr == 10) {
|
||||||
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
} else if (ctx->nr == 12) {
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
} else if (ctx->nr == 14) {
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
} else {
|
||||||
|
for (i = ctx->nr - 1; i; --i)
|
||||||
|
a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
}
|
||||||
a = _mm_aesenclast_si128( a, _mm_loadu_si128( rk ) );
|
a = _mm_aesenclast_si128( a, _mm_loadu_si128( rk ) );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (i = ctx->nr - 1; i; --i)
|
if (ctx->nr == 10) {
|
||||||
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
} else if (ctx->nr == 12) {
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
} else if (ctx->nr == 14) {
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
} else {
|
||||||
|
for (i = ctx->nr - 1; i; --i)
|
||||||
|
a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) );
|
||||||
|
}
|
||||||
a = _mm_aesdeclast_si128( a, _mm_loadu_si128( rk ) );
|
a = _mm_aesdeclast_si128( a, _mm_loadu_si128( rk ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue