mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2025-01-20 22:01:08 +00:00
Implements AES and GCM with ARMv8 Crypto Extensions
A compact patch that provides AES and GCM implementations that utilize the ARMv8 Crypto Extensions. The config flag is MBEDTLS_ARMV8CE_AES_C, which is disabled by default as we don't do runtime checking for the feature. The new implementation lives in armv8ce_aes.c. Provides similar functionality to https://github.com/ARMmbed/mbedtls/pull/432 Thanks to Barry O'Rourke and others for that contribtion. Tested on a Cortex A53 device and QEMU. On a midrange phone the real AES-GCM throughput increases about 4x, while raw AES speed is up to 10x faster. When cross-compiling, you want to set something like: export CC='aarch64-linux-gnu-gcc' export CFLAGS='-Ofast -march=armv8-a+crypto' scripts/config.pl set MBEDTLS_ARMV8CE_AES_C QEMU seems to also need export LDFLAGS='-static' Then run normal make or cmake etc.
This commit is contained in:
parent
853f9bd65e
commit
dfb6015ca7
60
include/mbedtls/armv8ce_aes.h
Normal file
60
include/mbedtls/armv8ce_aes.h
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
/**
|
||||||
|
* \file armv8ce_aes.h
|
||||||
|
*
|
||||||
|
* \brief ARMv8 Cryptography Extensions -- Optimized code for AES and GCM
|
||||||
|
*
|
||||||
|
* Copyright (C) 2006-2017, ARM Limited, All Rights Reserved
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
* This file is part of mbed TLS (https://tls.mbed.org)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MBEDTLS_ARMV8CE_AES_H
|
||||||
|
#define MBEDTLS_ARMV8CE_AES_H
|
||||||
|
|
||||||
|
#include "aes.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief [ARMv8 Crypto Extensions] AES-ECB block en(de)cryption
|
||||||
|
*
|
||||||
|
* \param ctx AES context
|
||||||
|
* \param mode MBEDTLS_AES_ENCRYPT or MBEDTLS_AES_DECRYPT
|
||||||
|
* \param input 16-byte input block
|
||||||
|
* \param output 16-byte output block
|
||||||
|
*
|
||||||
|
* \return 0 on success (cannot fail)
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mbedtls_armv8ce_aes_crypt_ecb( mbedtls_aes_context *ctx,
|
||||||
|
int mode,
|
||||||
|
const unsigned char input[16],
|
||||||
|
unsigned char output[16] );
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief [ARMv8 Crypto Extensions] Multiply in GF(2^128) for GCM
|
||||||
|
*
|
||||||
|
* \param c Result
|
||||||
|
* \param a First operand
|
||||||
|
* \param b Second operand
|
||||||
|
*
|
||||||
|
* \note Both operands and result are bit strings interpreted as
|
||||||
|
* elements of GF(2^128) as per the GCM spec.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void mbedtls_armv8ce_gcm_mult( unsigned char c[16],
|
||||||
|
const unsigned char a[16],
|
||||||
|
const unsigned char b[16] );
|
||||||
|
|
||||||
|
#endif /* MBEDTLS_ARMV8CE_AES_H */
|
143
library/armv8ce_aes.c
Normal file
143
library/armv8ce_aes.c
Normal file
|
@ -0,0 +1,143 @@
|
||||||
|
/*
|
||||||
|
* ARMv8 Cryptography Extensions -- Optimized code for AES and GCM
|
||||||
|
*
|
||||||
|
* Copyright (C) 2006-2017, ARM Limited, All Rights Reserved
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
* This file is part of mbed TLS (https://tls.mbed.org)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(MBEDTLS_CONFIG_FILE)
|
||||||
|
#include "mbedtls/config.h"
|
||||||
|
#else
|
||||||
|
#include MBEDTLS_CONFIG_FILE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(MBEDTLS_ARMV8CE_AES_C)
|
||||||
|
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#include "mbedtls/armv8ce_aes.h"
|
||||||
|
|
||||||
|
#ifndef asm
|
||||||
|
#define asm __asm
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* [ARMv8 Crypto Extensions] AES-ECB block en(de)cryption
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(MBEDTLS_AES_C)
|
||||||
|
|
||||||
|
int mbedtls_armv8ce_aes_crypt_ecb( mbedtls_aes_context *ctx,
|
||||||
|
int mode,
|
||||||
|
const unsigned char input[16],
|
||||||
|
unsigned char output[16] )
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
const uint8_t *rk;
|
||||||
|
uint8x16_t x, k;
|
||||||
|
|
||||||
|
x = vld1q_u8( input ); // input block
|
||||||
|
rk = (const uint8_t *) ctx->rk; // round keys
|
||||||
|
|
||||||
|
if( mode == MBEDTLS_AES_ENCRYPT )
|
||||||
|
{
|
||||||
|
for( i = ctx->nr - 1; i ; i-- ) // encryption loop
|
||||||
|
{
|
||||||
|
k = vld1q_u8( rk );
|
||||||
|
rk += 16;
|
||||||
|
x = vaeseq_u8( x, k );
|
||||||
|
x = vaesmcq_u8( x );
|
||||||
|
}
|
||||||
|
k = vld1q_u8( rk );
|
||||||
|
rk += 16;
|
||||||
|
x = vaeseq_u8( x, k );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for( i = ctx->nr - 1; i ; i-- ) // decryption loop
|
||||||
|
{
|
||||||
|
k = vld1q_u8( rk );
|
||||||
|
rk += 16;
|
||||||
|
x = vaesdq_u8( x, k );
|
||||||
|
x = vaesimcq_u8( x );
|
||||||
|
}
|
||||||
|
k = vld1q_u8( rk );
|
||||||
|
rk += 16;
|
||||||
|
x = vaesdq_u8( x, k );
|
||||||
|
}
|
||||||
|
|
||||||
|
k = vld1q_u8( rk ); // final key just XORed
|
||||||
|
x = veorq_u8( x, k );
|
||||||
|
vst1q_u8( output, x ); // write out
|
||||||
|
|
||||||
|
return ( 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* MBEDTLS_AES_C */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* [ARMv8 Crypto Extensions] Multiply in GF(2^128) for GCM
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(MBEDTLS_GCM_C)
|
||||||
|
|
||||||
|
void mbedtls_armv8ce_gcm_mult( unsigned char c[16],
|
||||||
|
const unsigned char a[16],
|
||||||
|
const unsigned char b[16] )
|
||||||
|
{
|
||||||
|
// GCM's GF(2^128) polynomial basis is x^128 + x^7 + x^2 + x + 1
|
||||||
|
const uint64x2_t base = { 0, 0x86 }; // note missing LS bit
|
||||||
|
|
||||||
|
register uint8x16_t vc asm( "v0" ); // named registers
|
||||||
|
register uint8x16_t va asm( "v1" ); // (to avoid conflict)
|
||||||
|
register uint8x16_t vb asm( "v2" );
|
||||||
|
register uint64x2_t vp asm( "v3" );
|
||||||
|
|
||||||
|
va = vld1q_u8( a ); // load inputs
|
||||||
|
vb = vld1q_u8( b );
|
||||||
|
vp = base;
|
||||||
|
|
||||||
|
asm (
|
||||||
|
"rbit %1.16b, %1.16b \n\t" // reverse bit order
|
||||||
|
"rbit %2.16b, %2.16b \n\t"
|
||||||
|
"pmull2 %0.1q, %1.2d, %2.2d \n\t" // v0 = a.hi * b.hi
|
||||||
|
"pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce
|
||||||
|
"ext %0.16b, %0.16b, %0.16b, #8 \n\t"
|
||||||
|
"eor %0.16b, %0.16b, v4.16b \n\t"
|
||||||
|
"ext v5.16b, %2.16b, %2.16b, #8 \n\t" // (swap hi and lo in b)
|
||||||
|
"pmull v4.1q, %1.1d, v5.1d \n\t" // v0 ^= a.lo * b.hi
|
||||||
|
"eor %0.16b, %0.16b, v4.16b \n\t"
|
||||||
|
"pmull2 v4.1q, %1.2d, v5.2d \n\t" // v0 ^= a.hi * b.lo
|
||||||
|
"eor %0.16b, %0.16b, v4.16b \n\t"
|
||||||
|
"pmull2 v4.1q, %0.2d, %3.2d \n\t" // mul v0 by x^64, reduce
|
||||||
|
"ext %0.16b, %0.16b, %0.16b, #8 \n\t"
|
||||||
|
"eor %0.16b, %0.16b, v4.16b \n\t"
|
||||||
|
"pmull v4.1q, %1.1d, %2.1d \n\t" // v0 ^= a.lo * b.lo
|
||||||
|
"eor %0.16b, %0.16b, v4.16b \n\t"
|
||||||
|
"rbit %0.16b, %0.16b \n\t" // reverse bits for output
|
||||||
|
: "=w" (vc) // q0: output
|
||||||
|
: "w" (va), "w" (vb), "w" (vp) // q1, q2: input
|
||||||
|
: "v4", "v5" // q4, q5: clobbered
|
||||||
|
);
|
||||||
|
|
||||||
|
vst1q_u8( c, vc ); // write out
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* MBEDTLS_GCM_C */
|
||||||
|
|
||||||
|
#endif /* MBEDTLS_ARMV8CE_AES_C */
|
||||||
|
|
Loading…
Reference in a new issue