From 72c172a13d017548f811aebc144bacb971ff490d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Mon, 30 Dec 2013 16:04:55 +0100 Subject: [PATCH 1/5] Save some small memory allocations inside ecp_mul() --- library/ecp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/library/ecp.c b/library/ecp.c index 8c5c06e69..5cde22d69 100644 --- a/library/ecp.c +++ b/library/ecp.c @@ -915,6 +915,8 @@ cleanup: * due to the choice of precomputed points in the modified comb method. * So branches for these cases do not leak secret information. * + * We accept Q->Z being unset (saving memory in tables) as meaning 1. + * * Cost: 1A := 8M + 3S */ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R, @@ -933,13 +935,13 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R, if( mpi_cmp_int( &P->Z, 0 ) == 0 ) return( ecp_copy( R, Q ) ); - if( mpi_cmp_int( &Q->Z, 0 ) == 0 ) + if( Q->Z.p != NULL && mpi_cmp_int( &Q->Z, 0 ) == 0 ) return( ecp_copy( R, P ) ); /* * Make sure Q coordinates are normalized */ - if( mpi_cmp_int( &Q->Z, 1 ) != 0 ) + if( Q->Z.p != NULL && mpi_cmp_int( &Q->Z, 1 ) != 0 ) return( POLARSSL_ERR_ECP_BAD_INPUT_DATA ); mpi_init( &T1 ); mpi_init( &T2 ); mpi_init( &T3 ); mpi_init( &T4 ); @@ -1240,9 +1242,6 @@ static int ecp_select_comb( const ecp_group *grp, ecp_point *R, MPI_CHK( mpi_safe_cond_assign( &R->Y, &T[j].Y, j == ii ) ); } - /* The Z coordinate is always 1 */ - MPI_CHK( mpi_lset( &R->Z, 1 ) ); - /* Safely invert result if i is "negative" */ MPI_CHK( ecp_safe_invert_jac( grp, R, i >> 7 ) ); @@ -1271,6 +1270,7 @@ static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R, /* Start with a non-zero point and randomize its coordinates */ i = d; MPI_CHK( ecp_select_comb( grp, R, T, t_len, x[i] ) ); + MPI_CHK( mpi_lset( &R->Z, 1 ) ); if( f_rng != 0 ) MPI_CHK( ecp_randomize_jac( grp, R, f_rng, p_rng ) ); From 1f789b83485a1fe7e13470b87948f2829a47c595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Mon, 30 Dec 2013 17:31:56 +0100 Subject: [PATCH 2/5] Lessen peak memory usage in EC by freeing earlier Cuts peak usage by 25% :) --- library/ecp.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/library/ecp.c b/library/ecp.c index 5cde22d69..c3397a4f8 100644 --- a/library/ecp.c +++ b/library/ecp.c @@ -791,7 +791,16 @@ static int ecp_normalize_jac_many( const ecp_group *grp, MPI_CHK( mpi_mul_mpi( &T[i]->X, &T[i]->X, &ZZi ) ); MOD_MUL( T[i]->X ); MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &ZZi ) ); MOD_MUL( T[i]->Y ); MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &Zi ) ); MOD_MUL( T[i]->Y ); - MPI_CHK( mpi_lset( &T[i]->Z, 1 ) ); + + /* + * Post-precessing: reclaim some memory by shrinking coordinates + * - not storing Z (always 1) + * - shrinking other coordinates, but still keeping the same number of + * limbs as P, as otherwise it will too likely be regrown too fast. + */ + mpi_shrink( &T[i]->X, grp->P.n ); + mpi_shrink( &T[i]->Y, grp->P.n ); + mpi_free( &T[i]->Z ); if( i == 0 ) break; @@ -1205,19 +1214,6 @@ static int ecp_precompute_comb( const ecp_group *grp, ecp_normalize_jac_many( grp, TT, k ); - /* - * Post-precessing: reclaim some memory by - * - not storing Z (always 1) - * - shrinking other coordinates - * Keep the same number of limbs as P to avoid re-growing on next use. - */ - for( i = 0; i < ( 1U << (w-1) ); i++ ) - { - mpi_free( &T[i].Z ); - mpi_shrink( &T[i].X, grp->P.n ); - mpi_shrink( &T[i].Y, grp->P.n ); - } - cleanup: return( ret ); } From 70896a023e663989a34bdbc5255f719d6e352b8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Mon, 30 Dec 2013 18:06:41 +0100 Subject: [PATCH 3/5] Add statistics about number of allocated blocks --- library/memory_buffer_alloc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/library/memory_buffer_alloc.c b/library/memory_buffer_alloc.c index 7ec6498de..1038c85a9 100644 --- a/library/memory_buffer_alloc.c +++ b/library/memory_buffer_alloc.c @@ -77,6 +77,7 @@ typedef struct size_t total_used; size_t maximum_used; size_t header_count; + size_t maximum_header_count; #endif #if defined(POLARSSL_THREADING_C) threading_mutex_t mutex; @@ -335,6 +336,8 @@ static void *buffer_alloc_malloc( size_t len ) #if defined(POLARSSL_MEMORY_DEBUG) heap.header_count++; + if( heap.header_count > heap.maximum_header_count ) + heap.maximum_header_count = heap.header_count; heap.total_used += cur->size; if( heap.total_used > heap.maximum_used) heap.maximum_used = heap.total_used; @@ -484,8 +487,11 @@ int memory_buffer_alloc_verify() void memory_buffer_alloc_status() { fprintf( stderr, - "Current use: %u blocks / %u bytes, max: %u bytes, malloc / free: %u / %u\n", - heap.header_count, heap.total_used, heap.maximum_used, + "Current use: %u blocks / %u bytes, max: %u blocks / %u bytes (total %u bytes), malloc / free: %u / %u\n", + heap.header_count, heap.total_used, + heap.maximum_header_count, heap.maximum_used, + heap.maximum_header_count * sizeof( memory_header ) + + heap.maximum_used, heap.malloc_count, heap.free_count ); if( heap.first->next == NULL ) From 9e4191c3e79cb934ea871eacf33d45d4b0eb71ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Mon, 30 Dec 2013 18:41:16 +0100 Subject: [PATCH 4/5] Add another option to reduce EC memory usage Also document speed/memory trade-offs better. --- include/polarssl/config.h | 1 + include/polarssl/ecp.h | 26 ++++++++++++++++++++++++-- library/ecp.c | 7 ++++++- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/include/polarssl/config.h b/include/polarssl/config.h index d98bdb34f..16643dcaf 100644 --- a/include/polarssl/config.h +++ b/include/polarssl/config.h @@ -1874,6 +1874,7 @@ // #define POLARSSL_ECP_MAX_BITS 521 /**< Maximum bit size of groups */ #define POLARSSL_ECP_WINDOW_SIZE 6 /**< Maximum window size used */ +#define POLARSSL_ECP_FIXED_POINT_OPTIM 1 /**< Enable fixed-point speed-up */ // Entropy options // diff --git a/include/polarssl/ecp.h b/include/polarssl/ecp.h index ff1072e0c..23351a742 100644 --- a/include/polarssl/ecp.h +++ b/include/polarssl/ecp.h @@ -178,11 +178,33 @@ ecp_keypair; * Minimum value: 2. Maximum value: 7. * * Result is an array of at most ( 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) ) - * points used for point multiplication. + * points used for point multiplication. This value is directly tied to EC + * peak memory usage, so decreasing it by one should roughly cut memory usage + * by two (if large curves are in use). * - * Reduction in size may reduce speed for big curves. + * Reduction in size may reduce speed, but larger curves are impacted first. + * Sample performances (in ECDHE handshakes/s, with FIXED_POINT_OPTIM = 1): + * w-size: 6 5 4 3 2 + * 521 145 141 135 120 97 + * 384 214 209 198 177 146 + * 256 320 320 303 262 226 + * 224 475 475 453 398 342 + * 192 640 640 633 587 476 */ #define POLARSSL_ECP_WINDOW_SIZE 6 /**< Maximum window size used */ + +/* + * Trade memory for speed on fixed-point multiplication. + * + * This speeds up repeated multiplication of the generator (that is, the + * multiplication in ECDSA signatures, and half of the multiplications in + * ECDSA verification and ECDHE) by a factor roughly 3 to 4. + * + * The cost is increasing EC peak memory usage by a factor roughly 2. + * + * Change this value to 0 to reduce peak memory usage. + */ +#define POLARSSL_ECP_FIXED_POINT_OPTIM 1 /**< Enable fixed-point speed-up */ #endif /* diff --git a/library/ecp.c b/library/ecp.c index c3397a4f8..58b5d752e 100644 --- a/library/ecp.c +++ b/library/ecp.c @@ -1315,12 +1315,17 @@ static int ecp_mul_comb( ecp_group *grp, ecp_point *R, /* * If P == G, pre-compute a bit more, since this may be re-used later. - * Just adding one ups the cost of the first mul by at most 3%. + * Just adding one avoids upping the cost of the first mul too much, + * and the memory cost too. */ +#if POLARSSL_ECP_FIXED_POINT_OPTIM == 1 p_eq_g = ( mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 && mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 ); if( p_eq_g ) w++; +#else + p_eq_g = 0; +#endif /* * Make sure w is within bounds. From 26bc1c0f5d2d7a966aec9da5488afa57505f9bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Mon, 30 Dec 2013 19:33:33 +0100 Subject: [PATCH 5/5] Fix a few unchecked return codes in EC --- library/ecp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/library/ecp.c b/library/ecp.c index 58b5d752e..3adab15fd 100644 --- a/library/ecp.c +++ b/library/ecp.c @@ -798,8 +798,8 @@ static int ecp_normalize_jac_many( const ecp_group *grp, * - shrinking other coordinates, but still keeping the same number of * limbs as P, as otherwise it will too likely be regrown too fast. */ - mpi_shrink( &T[i]->X, grp->P.n ); - mpi_shrink( &T[i]->Y, grp->P.n ); + MPI_CHK( mpi_shrink( &T[i]->X, grp->P.n ) ); + MPI_CHK( mpi_shrink( &T[i]->Y, grp->P.n ) ); mpi_free( &T[i]->Z ); if( i == 0 ) @@ -1036,7 +1036,7 @@ int ecp_sub( const ecp_group *grp, ecp_point *R, return( POLARSSL_ERR_ECP_FEATURE_UNAVAILABLE ); /* mQ = - Q */ - ecp_copy( &mQ, Q ); + MPI_CHK( ecp_copy( &mQ, Q ) ); if( mpi_cmp_int( &mQ.Y, 0 ) != 0 ) MPI_CHK( mpi_sub_mpi( &mQ.Y, &grp->P, &mQ.Y ) ); @@ -1195,7 +1195,7 @@ static int ecp_precompute_comb( const ecp_group *grp, TT[k++] = cur; } - ecp_normalize_jac_many( grp, TT, k ); + MPI_CHK( ecp_normalize_jac_many( grp, TT, k ) ); /* * Compute the remaining ones using the minimal number of additions @@ -1207,12 +1207,12 @@ static int ecp_precompute_comb( const ecp_group *grp, j = i; while( j-- ) { - ecp_add_mixed( grp, &T[i + j], &T[j], &T[i] ); + MPI_CHK( ecp_add_mixed( grp, &T[i + j], &T[j], &T[i] ) ); TT[k++] = &T[i + j]; } } - ecp_normalize_jac_many( grp, TT, k ); + MPI_CHK( ecp_normalize_jac_many( grp, TT, k ) ); cleanup: return( ret );