From 2fd1bb8f02c711e047889e10177d7360d256204c Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 12 Nov 2015 16:38:31 +0200 Subject: [PATCH] Add option to use smaller AES tables (table sizes reduced by 6144 bytes) This patch adds MBEDTLS_AES_SMALL_TABLES option to reduce number of AES look-up tables and thus save 6 KiB of memory. Enabling this option cause performance hit MBEDTLS_AES_SMALL_TABLES of ~7% on ARM and ~15% on x86-64. Benchmark on Cortex-A7 (armhf): Before: AES-CBC-128 : 14394 Kb/s, 0 cycles/byte AES-CBC-192 : 12442 Kb/s, 0 cycles/byte AES-CBC-256 : 10958 Kb/s, 0 cycles/byte After: AES-CBC-128 : 13342 Kb/s, 0 cycles/byte AES-CBC-192 : 11469 Kb/s, 0 cycles/byte AES-CBC-256 : 10058 Kb/s, 0 cycles/byte Benchmark on Intel Core i5-4570 (x86_64, 3.2 Ghz, no turbo): Before: AES-CBC-128 : 215759 Kb/s, 14 cycles/byte AES-CBC-192 : 190884 Kb/s, 16 cycles/byte AES-CBC-256 : 171536 Kb/s, 18 cycles/byte After: AES-CBC-128 : 185108 Kb/s, 16 cycles/byte AES-CBC-192 : 162839 Kb/s, 19 cycles/byte AES-CBC-256 : 144700 Kb/s, 21 cycles/byte --- include/mbedtls/config.h | 9 +++ library/aes.c | 140 +++++++++++++++++++++++++------------ library/version_features.c | 3 + 3 files changed, 106 insertions(+), 46 deletions(-) diff --git a/include/mbedtls/config.h b/include/mbedtls/config.h index c4b8995c1..44def95b8 100644 --- a/include/mbedtls/config.h +++ b/include/mbedtls/config.h @@ -387,6 +387,15 @@ */ //#define MBEDTLS_AES_ROM_TABLES +/** + * \def MBEDTLS_AES_SMALL_TABLES + * + * Use less ROM/RAM for the AES implementation (saves about 6144 bytes). + * + * Uncomment this macro to use less memory for AES. + */ +//#define MBEDTLS_AES_SMALL_TABLES + /** * \def MBEDTLS_CAMELLIA_SMALL_MEMORY * diff --git a/library/aes.c b/library/aes.c index 5e01c4f2b..aabacf9f8 100644 --- a/library/aes.c +++ b/library/aes.c @@ -201,6 +201,8 @@ static const unsigned char FSb[256] = static const uint32_t FT0[256] = { FT }; #undef V +#ifndef MBEDTLS_AES_SMALL_TABLES + #define V(a,b,c,d) 0x##b##c##d##a static const uint32_t FT1[256] = { FT }; #undef V @@ -213,6 +215,8 @@ static const uint32_t FT2[256] = { FT }; static const uint32_t FT3[256] = { FT }; #undef V +#endif /* !MBEDTLS_AES_SMALL_TABLES */ + #undef FT /* @@ -328,6 +332,8 @@ static const unsigned char RSb[256] = static const uint32_t RT0[256] = { RT }; #undef V +#ifndef MBEDTLS_AES_SMALL_TABLES + #define V(a,b,c,d) 0x##b##c##d##a static const uint32_t RT1[256] = { RT }; #undef V @@ -340,6 +346,8 @@ static const uint32_t RT2[256] = { RT }; static const uint32_t RT3[256] = { RT }; #undef V +#endif /* !MBEDTLS_AES_SMALL_TABLES */ + #undef RT /* @@ -359,18 +367,22 @@ static const uint32_t RCON[10] = */ static unsigned char FSb[256]; static uint32_t FT0[256]; +#ifndef MBEDTLS_AES_SMALL_TABLES static uint32_t FT1[256]; static uint32_t FT2[256]; static uint32_t FT3[256]; +#endif /* !MBEDTLS_AES_SMALL_TABLES */ /* * Reverse S-box & tables */ static unsigned char RSb[256]; static uint32_t RT0[256]; +#ifndef MBEDTLS_AES_SMALL_TABLES static uint32_t RT1[256]; static uint32_t RT2[256]; static uint32_t RT3[256]; +#endif /* !MBEDTLS_AES_SMALL_TABLES */ /* * Round constants @@ -445,9 +457,11 @@ static void aes_gen_tables( void ) ( (uint32_t) x << 16 ) ^ ( (uint32_t) z << 24 ); +#ifndef MBEDTLS_AES_SMALL_TABLES FT1[i] = ROTL8( FT0[i] ); FT2[i] = ROTL8( FT1[i] ); FT3[i] = ROTL8( FT2[i] ); +#endif /* !MBEDTLS_AES_SMALL_TABLES */ x = RSb[i]; @@ -456,14 +470,48 @@ static void aes_gen_tables( void ) ( (uint32_t) MUL( 0x0D, x ) << 16 ) ^ ( (uint32_t) MUL( 0x0B, x ) << 24 ); +#ifndef MBEDTLS_AES_SMALL_TABLES RT1[i] = ROTL8( RT0[i] ); RT2[i] = ROTL8( RT1[i] ); RT3[i] = ROTL8( RT2[i] ); +#endif /* !MBEDTLS_AES_SMALL_TABLES */ } } +#undef ROTL8 + #endif /* MBEDTLS_AES_ROM_TABLES */ +#ifdef MBEDTLS_AES_SMALL_TABLES + +#define ROTL8(x) ( (uint32_t)( ( x ) << 8 ) + (uint32_t)( ( x ) >> 24 ) ) +#define ROTL16(x) ( (uint32_t)( ( x ) << 16 ) + (uint32_t)( ( x ) >> 16 ) ) +#define ROTL24(x) ( (uint32_t)( ( x ) << 24 ) + (uint32_t)( ( x ) >> 8 ) ) + +#define AES_RT0(idx) RT0[idx] +#define AES_RT1(idx) ROTL8( RT0[idx] ) +#define AES_RT2(idx) ROTL16( RT0[idx] ) +#define AES_RT3(idx) ROTL24( RT0[idx] ) + +#define AES_FT0(idx) FT0[idx] +#define AES_FT1(idx) ROTL8( FT0[idx] ) +#define AES_FT2(idx) ROTL16( FT0[idx] ) +#define AES_FT3(idx) ROTL24( FT0[idx] ) + +#else /* MBEDTLS_AES_SMALL_TABLES */ + +#define AES_RT0(idx) RT0[idx] +#define AES_RT1(idx) RT1[idx] +#define AES_RT2(idx) RT2[idx] +#define AES_RT3(idx) RT3[idx] + +#define AES_FT0(idx) FT0[idx] +#define AES_FT1(idx) FT1[idx] +#define AES_FT2(idx) FT2[idx] +#define AES_FT3(idx) FT3[idx] + +#endif /* MBEDTLS_AES_SMALL_TABLES */ + void mbedtls_aes_init( mbedtls_aes_context *ctx ) { memset( ctx, 0, sizeof( mbedtls_aes_context ) ); @@ -641,10 +689,10 @@ int mbedtls_aes_setkey_dec( mbedtls_aes_context *ctx, const unsigned char *key, { for( j = 0; j < 4; j++, SK++ ) { - *RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^ - RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^ - RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^ - RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ]; + *RK++ = AES_RT0( FSb[ ( *SK ) & 0xFF ] ) ^ + AES_RT1( FSb[ ( *SK >> 8 ) & 0xFF ] ) ^ + AES_RT2( FSb[ ( *SK >> 16 ) & 0xFF ] ) ^ + AES_RT3( FSb[ ( *SK >> 24 ) & 0xFF ] ); } } @@ -660,50 +708,50 @@ exit: } #endif /* !MBEDTLS_AES_SETKEY_DEC_ALT */ -#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ -{ \ - X0 = *RK++ ^ FT0[ ( Y0 ) & 0xFF ] ^ \ - FT1[ ( Y1 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y2 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y3 >> 24 ) & 0xFF ]; \ - \ - X1 = *RK++ ^ FT0[ ( Y1 ) & 0xFF ] ^ \ - FT1[ ( Y2 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y3 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y0 >> 24 ) & 0xFF ]; \ - \ - X2 = *RK++ ^ FT0[ ( Y2 ) & 0xFF ] ^ \ - FT1[ ( Y3 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y0 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y1 >> 24 ) & 0xFF ]; \ - \ - X3 = *RK++ ^ FT0[ ( Y3 ) & 0xFF ] ^ \ - FT1[ ( Y0 >> 8 ) & 0xFF ] ^ \ - FT2[ ( Y1 >> 16 ) & 0xFF ] ^ \ - FT3[ ( Y2 >> 24 ) & 0xFF ]; \ +#define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ +{ \ + X0 = *RK++ ^ AES_FT0( ( Y0 ) & 0xFF ) ^ \ + AES_FT1( ( Y1 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y2 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y3 >> 24 ) & 0xFF ); \ + \ + X1 = *RK++ ^ AES_FT0( ( Y1 ) & 0xFF ) ^ \ + AES_FT1( ( Y2 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y3 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y0 >> 24 ) & 0xFF ); \ + \ + X2 = *RK++ ^ AES_FT0( ( Y2 ) & 0xFF ) ^ \ + AES_FT1( ( Y3 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y0 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y1 >> 24 ) & 0xFF ); \ + \ + X3 = *RK++ ^ AES_FT0( ( Y3 ) & 0xFF ) ^ \ + AES_FT1( ( Y0 >> 8 ) & 0xFF ) ^ \ + AES_FT2( ( Y1 >> 16 ) & 0xFF ) ^ \ + AES_FT3( ( Y2 >> 24 ) & 0xFF ); \ } -#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ -{ \ - X0 = *RK++ ^ RT0[ ( Y0 ) & 0xFF ] ^ \ - RT1[ ( Y3 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y2 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y1 >> 24 ) & 0xFF ]; \ - \ - X1 = *RK++ ^ RT0[ ( Y1 ) & 0xFF ] ^ \ - RT1[ ( Y0 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y3 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y2 >> 24 ) & 0xFF ]; \ - \ - X2 = *RK++ ^ RT0[ ( Y2 ) & 0xFF ] ^ \ - RT1[ ( Y1 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y0 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y3 >> 24 ) & 0xFF ]; \ - \ - X3 = *RK++ ^ RT0[ ( Y3 ) & 0xFF ] ^ \ - RT1[ ( Y2 >> 8 ) & 0xFF ] ^ \ - RT2[ ( Y1 >> 16 ) & 0xFF ] ^ \ - RT3[ ( Y0 >> 24 ) & 0xFF ]; \ +#define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ +{ \ + X0 = *RK++ ^ AES_RT0( ( Y0 ) & 0xFF ) ^ \ + AES_RT1( ( Y3 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y2 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y1 >> 24 ) & 0xFF ); \ + \ + X1 = *RK++ ^ AES_RT0( ( Y1 ) & 0xFF ) ^ \ + AES_RT1( ( Y0 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y3 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y2 >> 24 ) & 0xFF ); \ + \ + X2 = *RK++ ^ AES_RT0( ( Y2 ) & 0xFF ) ^ \ + AES_RT1( ( Y1 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y0 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y3 >> 24 ) & 0xFF ); \ + \ + X3 = *RK++ ^ AES_RT0( ( Y3 ) & 0xFF ) ^ \ + AES_RT1( ( Y2 >> 8 ) & 0xFF ) ^ \ + AES_RT2( ( Y1 >> 16 ) & 0xFF ) ^ \ + AES_RT3( ( Y0 >> 24 ) & 0xFF ); \ } /* diff --git a/library/version_features.c b/library/version_features.c index 9f97c7bc3..2b651996c 100644 --- a/library/version_features.c +++ b/library/version_features.c @@ -198,6 +198,9 @@ static const char *features[] = { #if defined(MBEDTLS_AES_ROM_TABLES) "MBEDTLS_AES_ROM_TABLES", #endif /* MBEDTLS_AES_ROM_TABLES */ +#if defined(MBEDTLS_AES_SMALL_TABLES) + "MBEDTLS_AES_SMALL_TABLES", +#endif /* MBEDTLS_AES_SMALL_TABLES */ #if defined(MBEDTLS_CAMELLIA_SMALL_MEMORY) "MBEDTLS_CAMELLIA_SMALL_MEMORY", #endif /* MBEDTLS_CAMELLIA_SMALL_MEMORY */