mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2024-12-23 15:05:41 +00:00
Add option to use smaller AES tables (table sizes reduced by 6144 bytes)
This patch adds MBEDTLS_AES_SMALL_TABLES option to reduce number of AES look-up tables and thus save 6 KiB of memory. Enabling this option cause performance hit MBEDTLS_AES_SMALL_TABLES of ~7% on ARM and ~15% on x86-64. Benchmark on Cortex-A7 (armhf): Before: AES-CBC-128 : 14394 Kb/s, 0 cycles/byte AES-CBC-192 : 12442 Kb/s, 0 cycles/byte AES-CBC-256 : 10958 Kb/s, 0 cycles/byte After: AES-CBC-128 : 13342 Kb/s, 0 cycles/byte AES-CBC-192 : 11469 Kb/s, 0 cycles/byte AES-CBC-256 : 10058 Kb/s, 0 cycles/byte Benchmark on Intel Core i5-4570 (x86_64, 3.2 Ghz, no turbo): Before: AES-CBC-128 : 215759 Kb/s, 14 cycles/byte AES-CBC-192 : 190884 Kb/s, 16 cycles/byte AES-CBC-256 : 171536 Kb/s, 18 cycles/byte After: AES-CBC-128 : 185108 Kb/s, 16 cycles/byte AES-CBC-192 : 162839 Kb/s, 19 cycles/byte AES-CBC-256 : 144700 Kb/s, 21 cycles/byte
This commit is contained in:
parent
ddc6e52cc1
commit
2fd1bb8f02
|
@ -387,6 +387,15 @@
|
|||
*/
|
||||
//#define MBEDTLS_AES_ROM_TABLES
|
||||
|
||||
/**
|
||||
* \def MBEDTLS_AES_SMALL_TABLES
|
||||
*
|
||||
* Use less ROM/RAM for the AES implementation (saves about 6144 bytes).
|
||||
*
|
||||
* Uncomment this macro to use less memory for AES.
|
||||
*/
|
||||
//#define MBEDTLS_AES_SMALL_TABLES
|
||||
|
||||
/**
|
||||
* \def MBEDTLS_CAMELLIA_SMALL_MEMORY
|
||||
*
|
||||
|
|
140
library/aes.c
140
library/aes.c
|
@ -201,6 +201,8 @@ static const unsigned char FSb[256] =
|
|||
static const uint32_t FT0[256] = { FT };
|
||||
#undef V
|
||||
|
||||
#ifndef MBEDTLS_AES_SMALL_TABLES
|
||||
|
||||
#define V(a,b,c,d) 0x##b##c##d##a
|
||||
static const uint32_t FT1[256] = { FT };
|
||||
#undef V
|
||||
|
@ -213,6 +215,8 @@ static const uint32_t FT2[256] = { FT };
|
|||
static const uint32_t FT3[256] = { FT };
|
||||
#undef V
|
||||
|
||||
#endif /* !MBEDTLS_AES_SMALL_TABLES */
|
||||
|
||||
#undef FT
|
||||
|
||||
/*
|
||||
|
@ -328,6 +332,8 @@ static const unsigned char RSb[256] =
|
|||
static const uint32_t RT0[256] = { RT };
|
||||
#undef V
|
||||
|
||||
#ifndef MBEDTLS_AES_SMALL_TABLES
|
||||
|
||||
#define V(a,b,c,d) 0x##b##c##d##a
|
||||
static const uint32_t RT1[256] = { RT };
|
||||
#undef V
|
||||
|
@ -340,6 +346,8 @@ static const uint32_t RT2[256] = { RT };
|
|||
static const uint32_t RT3[256] = { RT };
|
||||
#undef V
|
||||
|
||||
#endif /* !MBEDTLS_AES_SMALL_TABLES */
|
||||
|
||||
#undef RT
|
||||
|
||||
/*
|
||||
|
@ -359,18 +367,22 @@ static const uint32_t RCON[10] =
|
|||
*/
|
||||
static unsigned char FSb[256];
|
||||
static uint32_t FT0[256];
|
||||
#ifndef MBEDTLS_AES_SMALL_TABLES
|
||||
static uint32_t FT1[256];
|
||||
static uint32_t FT2[256];
|
||||
static uint32_t FT3[256];
|
||||
#endif /* !MBEDTLS_AES_SMALL_TABLES */
|
||||
|
||||
/*
|
||||
* Reverse S-box & tables
|
||||
*/
|
||||
static unsigned char RSb[256];
|
||||
static uint32_t RT0[256];
|
||||
#ifndef MBEDTLS_AES_SMALL_TABLES
|
||||
static uint32_t RT1[256];
|
||||
static uint32_t RT2[256];
|
||||
static uint32_t RT3[256];
|
||||
#endif /* !MBEDTLS_AES_SMALL_TABLES */
|
||||
|
||||
/*
|
||||
* Round constants
|
||||
|
@ -445,9 +457,11 @@ static void aes_gen_tables( void )
|
|||
( (uint32_t) x << 16 ) ^
|
||||
( (uint32_t) z << 24 );
|
||||
|
||||
#ifndef MBEDTLS_AES_SMALL_TABLES
|
||||
FT1[i] = ROTL8( FT0[i] );
|
||||
FT2[i] = ROTL8( FT1[i] );
|
||||
FT3[i] = ROTL8( FT2[i] );
|
||||
#endif /* !MBEDTLS_AES_SMALL_TABLES */
|
||||
|
||||
x = RSb[i];
|
||||
|
||||
|
@ -456,14 +470,48 @@ static void aes_gen_tables( void )
|
|||
( (uint32_t) MUL( 0x0D, x ) << 16 ) ^
|
||||
( (uint32_t) MUL( 0x0B, x ) << 24 );
|
||||
|
||||
#ifndef MBEDTLS_AES_SMALL_TABLES
|
||||
RT1[i] = ROTL8( RT0[i] );
|
||||
RT2[i] = ROTL8( RT1[i] );
|
||||
RT3[i] = ROTL8( RT2[i] );
|
||||
#endif /* !MBEDTLS_AES_SMALL_TABLES */
|
||||
}
|
||||
}
|
||||
|
||||
#undef ROTL8
|
||||
|
||||
#endif /* MBEDTLS_AES_ROM_TABLES */
|
||||
|
||||
#ifdef MBEDTLS_AES_SMALL_TABLES
|
||||
|
||||
#define ROTL8(x) ( (uint32_t)( ( x ) << 8 ) + (uint32_t)( ( x ) >> 24 ) )
|
||||
#define ROTL16(x) ( (uint32_t)( ( x ) << 16 ) + (uint32_t)( ( x ) >> 16 ) )
|
||||
#define ROTL24(x) ( (uint32_t)( ( x ) << 24 ) + (uint32_t)( ( x ) >> 8 ) )
|
||||
|
||||
#define AES_RT0(idx) RT0[idx]
|
||||
#define AES_RT1(idx) ROTL8( RT0[idx] )
|
||||
#define AES_RT2(idx) ROTL16( RT0[idx] )
|
||||
#define AES_RT3(idx) ROTL24( RT0[idx] )
|
||||
|
||||
#define AES_FT0(idx) FT0[idx]
|
||||
#define AES_FT1(idx) ROTL8( FT0[idx] )
|
||||
#define AES_FT2(idx) ROTL16( FT0[idx] )
|
||||
#define AES_FT3(idx) ROTL24( FT0[idx] )
|
||||
|
||||
#else /* MBEDTLS_AES_SMALL_TABLES */
|
||||
|
||||
#define AES_RT0(idx) RT0[idx]
|
||||
#define AES_RT1(idx) RT1[idx]
|
||||
#define AES_RT2(idx) RT2[idx]
|
||||
#define AES_RT3(idx) RT3[idx]
|
||||
|
||||
#define AES_FT0(idx) FT0[idx]
|
||||
#define AES_FT1(idx) FT1[idx]
|
||||
#define AES_FT2(idx) FT2[idx]
|
||||
#define AES_FT3(idx) FT3[idx]
|
||||
|
||||
#endif /* MBEDTLS_AES_SMALL_TABLES */
|
||||
|
||||
void mbedtls_aes_init( mbedtls_aes_context *ctx )
|
||||
{
|
||||
memset( ctx, 0, sizeof( mbedtls_aes_context ) );
|
||||
|
@ -641,10 +689,10 @@ int mbedtls_aes_setkey_dec( mbedtls_aes_context *ctx, const unsigned char *key,
|
|||
{
|
||||
for( j = 0; j < 4; j++, SK++ )
|
||||
{
|
||||
*RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^
|
||||
RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^
|
||||
RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^
|
||||
RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ];
|
||||
*RK++ = AES_RT0( FSb[ ( *SK ) & 0xFF ] ) ^
|
||||
AES_RT1( FSb[ ( *SK >> 8 ) & 0xFF ] ) ^
|
||||
AES_RT2( FSb[ ( *SK >> 16 ) & 0xFF ] ) ^
|
||||
AES_RT3( FSb[ ( *SK >> 24 ) & 0xFF ] );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -660,50 +708,50 @@ exit:
|
|||
}
|
||||
#endif /* !MBEDTLS_AES_SETKEY_DEC_ALT */
|
||||
|
||||
#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
|
||||
{ \
|
||||
X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \
|
||||
FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \
|
||||
FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \
|
||||
FT3[ ( Y3 >> 24 ) & 0xFF ]; \
|
||||
\
|
||||
X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \
|
||||
FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \
|
||||
FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \
|
||||
FT3[ ( Y0 >> 24 ) & 0xFF ]; \
|
||||
\
|
||||
X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \
|
||||
FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \
|
||||
FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \
|
||||
FT3[ ( Y1 >> 24 ) & 0xFF ]; \
|
||||
\
|
||||
X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \
|
||||
FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \
|
||||
FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \
|
||||
FT3[ ( Y2 >> 24 ) & 0xFF ]; \
|
||||
#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
|
||||
{ \
|
||||
X0 = *RK++ ^ AES_FT0( ( Y0 ) & 0xFF ) ^ \
|
||||
AES_FT1( ( Y1 >> 8 ) & 0xFF ) ^ \
|
||||
AES_FT2( ( Y2 >> 16 ) & 0xFF ) ^ \
|
||||
AES_FT3( ( Y3 >> 24 ) & 0xFF ); \
|
||||
\
|
||||
X1 = *RK++ ^ AES_FT0( ( Y1 ) & 0xFF ) ^ \
|
||||
AES_FT1( ( Y2 >> 8 ) & 0xFF ) ^ \
|
||||
AES_FT2( ( Y3 >> 16 ) & 0xFF ) ^ \
|
||||
AES_FT3( ( Y0 >> 24 ) & 0xFF ); \
|
||||
\
|
||||
X2 = *RK++ ^ AES_FT0( ( Y2 ) & 0xFF ) ^ \
|
||||
AES_FT1( ( Y3 >> 8 ) & 0xFF ) ^ \
|
||||
AES_FT2( ( Y0 >> 16 ) & 0xFF ) ^ \
|
||||
AES_FT3( ( Y1 >> 24 ) & 0xFF ); \
|
||||
\
|
||||
X3 = *RK++ ^ AES_FT0( ( Y3 ) & 0xFF ) ^ \
|
||||
AES_FT1( ( Y0 >> 8 ) & 0xFF ) ^ \
|
||||
AES_FT2( ( Y1 >> 16 ) & 0xFF ) ^ \
|
||||
AES_FT3( ( Y2 >> 24 ) & 0xFF ); \
|
||||
}
|
||||
|
||||
#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
|
||||
{ \
|
||||
X0 = *RK++ ^ RT0[ ( Y0 ) & 0xFF ] ^ \
|
||||
RT1[ ( Y3 >> 8 ) & 0xFF ] ^ \
|
||||
RT2[ ( Y2 >> 16 ) & 0xFF ] ^ \
|
||||
RT3[ ( Y1 >> 24 ) & 0xFF ]; \
|
||||
\
|
||||
X1 = *RK++ ^ RT0[ ( Y1 ) & 0xFF ] ^ \
|
||||
RT1[ ( Y0 >> 8 ) & 0xFF ] ^ \
|
||||
RT2[ ( Y3 >> 16 ) & 0xFF ] ^ \
|
||||
RT3[ ( Y2 >> 24 ) & 0xFF ]; \
|
||||
\
|
||||
X2 = *RK++ ^ RT0[ ( Y2 ) & 0xFF ] ^ \
|
||||
RT1[ ( Y1 >> 8 ) & 0xFF ] ^ \
|
||||
RT2[ ( Y0 >> 16 ) & 0xFF ] ^ \
|
||||
RT3[ ( Y3 >> 24 ) & 0xFF ]; \
|
||||
\
|
||||
X3 = *RK++ ^ RT0[ ( Y3 ) & 0xFF ] ^ \
|
||||
RT1[ ( Y2 >> 8 ) & 0xFF ] ^ \
|
||||
RT2[ ( Y1 >> 16 ) & 0xFF ] ^ \
|
||||
RT3[ ( Y0 >> 24 ) & 0xFF ]; \
|
||||
#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
|
||||
{ \
|
||||
X0 = *RK++ ^ AES_RT0( ( Y0 ) & 0xFF ) ^ \
|
||||
AES_RT1( ( Y3 >> 8 ) & 0xFF ) ^ \
|
||||
AES_RT2( ( Y2 >> 16 ) & 0xFF ) ^ \
|
||||
AES_RT3( ( Y1 >> 24 ) & 0xFF ); \
|
||||
\
|
||||
X1 = *RK++ ^ AES_RT0( ( Y1 ) & 0xFF ) ^ \
|
||||
AES_RT1( ( Y0 >> 8 ) & 0xFF ) ^ \
|
||||
AES_RT2( ( Y3 >> 16 ) & 0xFF ) ^ \
|
||||
AES_RT3( ( Y2 >> 24 ) & 0xFF ); \
|
||||
\
|
||||
X2 = *RK++ ^ AES_RT0( ( Y2 ) & 0xFF ) ^ \
|
||||
AES_RT1( ( Y1 >> 8 ) & 0xFF ) ^ \
|
||||
AES_RT2( ( Y0 >> 16 ) & 0xFF ) ^ \
|
||||
AES_RT3( ( Y3 >> 24 ) & 0xFF ); \
|
||||
\
|
||||
X3 = *RK++ ^ AES_RT0( ( Y3 ) & 0xFF ) ^ \
|
||||
AES_RT1( ( Y2 >> 8 ) & 0xFF ) ^ \
|
||||
AES_RT2( ( Y1 >> 16 ) & 0xFF ) ^ \
|
||||
AES_RT3( ( Y0 >> 24 ) & 0xFF ); \
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -198,6 +198,9 @@ static const char *features[] = {
|
|||
#if defined(MBEDTLS_AES_ROM_TABLES)
|
||||
"MBEDTLS_AES_ROM_TABLES",
|
||||
#endif /* MBEDTLS_AES_ROM_TABLES */
|
||||
#if defined(MBEDTLS_AES_SMALL_TABLES)
|
||||
"MBEDTLS_AES_SMALL_TABLES",
|
||||
#endif /* MBEDTLS_AES_SMALL_TABLES */
|
||||
#if defined(MBEDTLS_CAMELLIA_SMALL_MEMORY)
|
||||
"MBEDTLS_CAMELLIA_SMALL_MEMORY",
|
||||
#endif /* MBEDTLS_CAMELLIA_SMALL_MEMORY */
|
||||
|
|
Loading…
Reference in a new issue