Add option to use smaller AES tables (table sizes reduced by 6144 bytes)

This patch adds MBEDTLS_AES_SMALL_TABLES option to reduce number of AES
look-up tables and thus save 6 KiB of memory. Enabling this option
cause performance hit MBEDTLS_AES_SMALL_TABLES of ~7% on ARM and ~15%
on x86-64.

Benchmark on Cortex-A7 (armhf):

Before:
  AES-CBC-128              :      14394 Kb/s,          0 cycles/byte
  AES-CBC-192              :      12442 Kb/s,          0 cycles/byte
  AES-CBC-256              :      10958 Kb/s,          0 cycles/byte

After:
  AES-CBC-128              :      13342 Kb/s,          0 cycles/byte
  AES-CBC-192              :      11469 Kb/s,          0 cycles/byte
  AES-CBC-256              :      10058 Kb/s,          0 cycles/byte

Benchmark on Intel Core i5-4570 (x86_64, 3.2 Ghz, no turbo):

Before:
  AES-CBC-128              :     215759 Kb/s,         14 cycles/byte
  AES-CBC-192              :     190884 Kb/s,         16 cycles/byte
  AES-CBC-256              :     171536 Kb/s,         18 cycles/byte

After:
  AES-CBC-128              :     185108 Kb/s,         16 cycles/byte
  AES-CBC-192              :     162839 Kb/s,         19 cycles/byte
  AES-CBC-256              :     144700 Kb/s,         21 cycles/byte
This commit is contained in:
Jussi Kivilinna 2015-11-12 16:38:31 +02:00 committed by Hanno Becker
parent ddc6e52cc1
commit 2fd1bb8f02
3 changed files with 106 additions and 46 deletions

View file

@ -387,6 +387,15 @@
*/
//#define MBEDTLS_AES_ROM_TABLES
/**
* \def MBEDTLS_AES_SMALL_TABLES
*
* Use less ROM/RAM for the AES implementation (saves about 6144 bytes).
*
* Uncomment this macro to use less memory for AES.
*/
//#define MBEDTLS_AES_SMALL_TABLES
/**
* \def MBEDTLS_CAMELLIA_SMALL_MEMORY
*

View file

@ -201,6 +201,8 @@ static const unsigned char FSb[256] =
static const uint32_t FT0[256] = { FT };
#undef V
#ifndef MBEDTLS_AES_SMALL_TABLES
#define V(a,b,c,d) 0x##b##c##d##a
static const uint32_t FT1[256] = { FT };
#undef V
@ -213,6 +215,8 @@ static const uint32_t FT2[256] = { FT };
static const uint32_t FT3[256] = { FT };
#undef V
#endif /* !MBEDTLS_AES_SMALL_TABLES */
#undef FT
/*
@ -328,6 +332,8 @@ static const unsigned char RSb[256] =
static const uint32_t RT0[256] = { RT };
#undef V
#ifndef MBEDTLS_AES_SMALL_TABLES
#define V(a,b,c,d) 0x##b##c##d##a
static const uint32_t RT1[256] = { RT };
#undef V
@ -340,6 +346,8 @@ static const uint32_t RT2[256] = { RT };
static const uint32_t RT3[256] = { RT };
#undef V
#endif /* !MBEDTLS_AES_SMALL_TABLES */
#undef RT
/*
@ -359,18 +367,22 @@ static const uint32_t RCON[10] =
*/
static unsigned char FSb[256];
static uint32_t FT0[256];
#ifndef MBEDTLS_AES_SMALL_TABLES
static uint32_t FT1[256];
static uint32_t FT2[256];
static uint32_t FT3[256];
#endif /* !MBEDTLS_AES_SMALL_TABLES */
/*
* Reverse S-box & tables
*/
static unsigned char RSb[256];
static uint32_t RT0[256];
#ifndef MBEDTLS_AES_SMALL_TABLES
static uint32_t RT1[256];
static uint32_t RT2[256];
static uint32_t RT3[256];
#endif /* !MBEDTLS_AES_SMALL_TABLES */
/*
* Round constants
@ -445,9 +457,11 @@ static void aes_gen_tables( void )
( (uint32_t) x << 16 ) ^
( (uint32_t) z << 24 );
#ifndef MBEDTLS_AES_SMALL_TABLES
FT1[i] = ROTL8( FT0[i] );
FT2[i] = ROTL8( FT1[i] );
FT3[i] = ROTL8( FT2[i] );
#endif /* !MBEDTLS_AES_SMALL_TABLES */
x = RSb[i];
@ -456,14 +470,48 @@ static void aes_gen_tables( void )
( (uint32_t) MUL( 0x0D, x ) << 16 ) ^
( (uint32_t) MUL( 0x0B, x ) << 24 );
#ifndef MBEDTLS_AES_SMALL_TABLES
RT1[i] = ROTL8( RT0[i] );
RT2[i] = ROTL8( RT1[i] );
RT3[i] = ROTL8( RT2[i] );
#endif /* !MBEDTLS_AES_SMALL_TABLES */
}
}
#undef ROTL8
#endif /* MBEDTLS_AES_ROM_TABLES */
#ifdef MBEDTLS_AES_SMALL_TABLES
#define ROTL8(x) ( (uint32_t)( ( x ) << 8 ) + (uint32_t)( ( x ) >> 24 ) )
#define ROTL16(x) ( (uint32_t)( ( x ) << 16 ) + (uint32_t)( ( x ) >> 16 ) )
#define ROTL24(x) ( (uint32_t)( ( x ) << 24 ) + (uint32_t)( ( x ) >> 8 ) )
#define AES_RT0(idx) RT0[idx]
#define AES_RT1(idx) ROTL8( RT0[idx] )
#define AES_RT2(idx) ROTL16( RT0[idx] )
#define AES_RT3(idx) ROTL24( RT0[idx] )
#define AES_FT0(idx) FT0[idx]
#define AES_FT1(idx) ROTL8( FT0[idx] )
#define AES_FT2(idx) ROTL16( FT0[idx] )
#define AES_FT3(idx) ROTL24( FT0[idx] )
#else /* MBEDTLS_AES_SMALL_TABLES */
#define AES_RT0(idx) RT0[idx]
#define AES_RT1(idx) RT1[idx]
#define AES_RT2(idx) RT2[idx]
#define AES_RT3(idx) RT3[idx]
#define AES_FT0(idx) FT0[idx]
#define AES_FT1(idx) FT1[idx]
#define AES_FT2(idx) FT2[idx]
#define AES_FT3(idx) FT3[idx]
#endif /* MBEDTLS_AES_SMALL_TABLES */
void mbedtls_aes_init( mbedtls_aes_context *ctx )
{
memset( ctx, 0, sizeof( mbedtls_aes_context ) );
@ -641,10 +689,10 @@ int mbedtls_aes_setkey_dec( mbedtls_aes_context *ctx, const unsigned char *key,
{
for( j = 0; j < 4; j++, SK++ )
{
*RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^
RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^
RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^
RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ];
*RK++ = AES_RT0( FSb[ ( *SK ) & 0xFF ] ) ^
AES_RT1( FSb[ ( *SK >> 8 ) & 0xFF ] ) ^
AES_RT2( FSb[ ( *SK >> 16 ) & 0xFF ] ) ^
AES_RT3( FSb[ ( *SK >> 24 ) & 0xFF ] );
}
}
@ -660,50 +708,50 @@ exit:
}
#endif /* !MBEDTLS_AES_SETKEY_DEC_ALT */
#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \
FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y3 >> 24 ) & 0xFF ]; \
\
X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \
FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y0 >> 24 ) & 0xFF ]; \
\
X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \
FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y1 >> 24 ) & 0xFF ]; \
\
X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \
FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \
FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \
FT3[ ( Y2 >> 24 ) & 0xFF ]; \
#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = *RK++ ^ AES_FT0( ( Y0 ) & 0xFF ) ^ \
AES_FT1( ( Y1 >> 8 ) & 0xFF ) ^ \
AES_FT2( ( Y2 >> 16 ) & 0xFF ) ^ \
AES_FT3( ( Y3 >> 24 ) & 0xFF ); \
\
X1 = *RK++ ^ AES_FT0( ( Y1 ) & 0xFF ) ^ \
AES_FT1( ( Y2 >> 8 ) & 0xFF ) ^ \
AES_FT2( ( Y3 >> 16 ) & 0xFF ) ^ \
AES_FT3( ( Y0 >> 24 ) & 0xFF ); \
\
X2 = *RK++ ^ AES_FT0( ( Y2 ) & 0xFF ) ^ \
AES_FT1( ( Y3 >> 8 ) & 0xFF ) ^ \
AES_FT2( ( Y0 >> 16 ) & 0xFF ) ^ \
AES_FT3( ( Y1 >> 24 ) & 0xFF ); \
\
X3 = *RK++ ^ AES_FT0( ( Y3 ) & 0xFF ) ^ \
AES_FT1( ( Y0 >> 8 ) & 0xFF ) ^ \
AES_FT2( ( Y1 >> 16 ) & 0xFF ) ^ \
AES_FT3( ( Y2 >> 24 ) & 0xFF ); \
}
#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = *RK++ ^ RT0[ ( Y0 ) & 0xFF ] ^ \
RT1[ ( Y3 >> 8 ) & 0xFF ] ^ \
RT2[ ( Y2 >> 16 ) & 0xFF ] ^ \
RT3[ ( Y1 >> 24 ) & 0xFF ]; \
\
X1 = *RK++ ^ RT0[ ( Y1 ) & 0xFF ] ^ \
RT1[ ( Y0 >> 8 ) & 0xFF ] ^ \
RT2[ ( Y3 >> 16 ) & 0xFF ] ^ \
RT3[ ( Y2 >> 24 ) & 0xFF ]; \
\
X2 = *RK++ ^ RT0[ ( Y2 ) & 0xFF ] ^ \
RT1[ ( Y1 >> 8 ) & 0xFF ] ^ \
RT2[ ( Y0 >> 16 ) & 0xFF ] ^ \
RT3[ ( Y3 >> 24 ) & 0xFF ]; \
\
X3 = *RK++ ^ RT0[ ( Y3 ) & 0xFF ] ^ \
RT1[ ( Y2 >> 8 ) & 0xFF ] ^ \
RT2[ ( Y1 >> 16 ) & 0xFF ] ^ \
RT3[ ( Y0 >> 24 ) & 0xFF ]; \
#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = *RK++ ^ AES_RT0( ( Y0 ) & 0xFF ) ^ \
AES_RT1( ( Y3 >> 8 ) & 0xFF ) ^ \
AES_RT2( ( Y2 >> 16 ) & 0xFF ) ^ \
AES_RT3( ( Y1 >> 24 ) & 0xFF ); \
\
X1 = *RK++ ^ AES_RT0( ( Y1 ) & 0xFF ) ^ \
AES_RT1( ( Y0 >> 8 ) & 0xFF ) ^ \
AES_RT2( ( Y3 >> 16 ) & 0xFF ) ^ \
AES_RT3( ( Y2 >> 24 ) & 0xFF ); \
\
X2 = *RK++ ^ AES_RT0( ( Y2 ) & 0xFF ) ^ \
AES_RT1( ( Y1 >> 8 ) & 0xFF ) ^ \
AES_RT2( ( Y0 >> 16 ) & 0xFF ) ^ \
AES_RT3( ( Y3 >> 24 ) & 0xFF ); \
\
X3 = *RK++ ^ AES_RT0( ( Y3 ) & 0xFF ) ^ \
AES_RT1( ( Y2 >> 8 ) & 0xFF ) ^ \
AES_RT2( ( Y1 >> 16 ) & 0xFF ) ^ \
AES_RT3( ( Y0 >> 24 ) & 0xFF ); \
}
/*

View file

@ -198,6 +198,9 @@ static const char *features[] = {
#if defined(MBEDTLS_AES_ROM_TABLES)
"MBEDTLS_AES_ROM_TABLES",
#endif /* MBEDTLS_AES_ROM_TABLES */
#if defined(MBEDTLS_AES_SMALL_TABLES)
"MBEDTLS_AES_SMALL_TABLES",
#endif /* MBEDTLS_AES_SMALL_TABLES */
#if defined(MBEDTLS_CAMELLIA_SMALL_MEMORY)
"MBEDTLS_CAMELLIA_SMALL_MEMORY",
#endif /* MBEDTLS_CAMELLIA_SMALL_MEMORY */