mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2025-01-09 01:05:30 +00:00
Merged ECP memory usage optimizations
This commit is contained in:
commit
c73879139e
|
@ -19,6 +19,7 @@ Changes
|
||||||
* More constant-time checks in the RSA module
|
* More constant-time checks in the RSA module
|
||||||
* Split off curves from ecp.c into ecp_curves.c
|
* Split off curves from ecp.c into ecp_curves.c
|
||||||
* Curves are now stored fully in ROM
|
* Curves are now stored fully in ROM
|
||||||
|
* Memory usage optimizations in ECP module
|
||||||
|
|
||||||
Bugfix
|
Bugfix
|
||||||
* Fixed bug in mpi_set_bit() on platforms where t_uint is wider than int
|
* Fixed bug in mpi_set_bit() on platforms where t_uint is wider than int
|
||||||
|
|
|
@ -1890,6 +1890,7 @@
|
||||||
//
|
//
|
||||||
#define POLARSSL_ECP_MAX_BITS 521 /**< Maximum bit size of groups */
|
#define POLARSSL_ECP_MAX_BITS 521 /**< Maximum bit size of groups */
|
||||||
#define POLARSSL_ECP_WINDOW_SIZE 6 /**< Maximum window size used */
|
#define POLARSSL_ECP_WINDOW_SIZE 6 /**< Maximum window size used */
|
||||||
|
#define POLARSSL_ECP_FIXED_POINT_OPTIM 1 /**< Enable fixed-point speed-up */
|
||||||
|
|
||||||
// Entropy options
|
// Entropy options
|
||||||
//
|
//
|
||||||
|
|
|
@ -178,11 +178,33 @@ ecp_keypair;
|
||||||
* Minimum value: 2. Maximum value: 7.
|
* Minimum value: 2. Maximum value: 7.
|
||||||
*
|
*
|
||||||
* Result is an array of at most ( 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) )
|
* Result is an array of at most ( 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) )
|
||||||
* points used for point multiplication.
|
* points used for point multiplication. This value is directly tied to EC
|
||||||
|
* peak memory usage, so decreasing it by one should roughly cut memory usage
|
||||||
|
* by two (if large curves are in use).
|
||||||
*
|
*
|
||||||
* Reduction in size may reduce speed for big curves.
|
* Reduction in size may reduce speed, but larger curves are impacted first.
|
||||||
|
* Sample performances (in ECDHE handshakes/s, with FIXED_POINT_OPTIM = 1):
|
||||||
|
* w-size: 6 5 4 3 2
|
||||||
|
* 521 145 141 135 120 97
|
||||||
|
* 384 214 209 198 177 146
|
||||||
|
* 256 320 320 303 262 226
|
||||||
|
* 224 475 475 453 398 342
|
||||||
|
* 192 640 640 633 587 476
|
||||||
*/
|
*/
|
||||||
#define POLARSSL_ECP_WINDOW_SIZE 6 /**< Maximum window size used */
|
#define POLARSSL_ECP_WINDOW_SIZE 6 /**< Maximum window size used */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Trade memory for speed on fixed-point multiplication.
|
||||||
|
*
|
||||||
|
* This speeds up repeated multiplication of the generator (that is, the
|
||||||
|
* multiplication in ECDSA signatures, and half of the multiplications in
|
||||||
|
* ECDSA verification and ECDHE) by a factor roughly 3 to 4.
|
||||||
|
*
|
||||||
|
* The cost is increasing EC peak memory usage by a factor roughly 2.
|
||||||
|
*
|
||||||
|
* Change this value to 0 to reduce peak memory usage.
|
||||||
|
*/
|
||||||
|
#define POLARSSL_ECP_FIXED_POINT_OPTIM 1 /**< Enable fixed-point speed-up */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -791,7 +791,16 @@ static int ecp_normalize_jac_many( const ecp_group *grp,
|
||||||
MPI_CHK( mpi_mul_mpi( &T[i]->X, &T[i]->X, &ZZi ) ); MOD_MUL( T[i]->X );
|
MPI_CHK( mpi_mul_mpi( &T[i]->X, &T[i]->X, &ZZi ) ); MOD_MUL( T[i]->X );
|
||||||
MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &ZZi ) ); MOD_MUL( T[i]->Y );
|
MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &ZZi ) ); MOD_MUL( T[i]->Y );
|
||||||
MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &Zi ) ); MOD_MUL( T[i]->Y );
|
MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &Zi ) ); MOD_MUL( T[i]->Y );
|
||||||
MPI_CHK( mpi_lset( &T[i]->Z, 1 ) );
|
|
||||||
|
/*
|
||||||
|
* Post-precessing: reclaim some memory by shrinking coordinates
|
||||||
|
* - not storing Z (always 1)
|
||||||
|
* - shrinking other coordinates, but still keeping the same number of
|
||||||
|
* limbs as P, as otherwise it will too likely be regrown too fast.
|
||||||
|
*/
|
||||||
|
MPI_CHK( mpi_shrink( &T[i]->X, grp->P.n ) );
|
||||||
|
MPI_CHK( mpi_shrink( &T[i]->Y, grp->P.n ) );
|
||||||
|
mpi_free( &T[i]->Z );
|
||||||
|
|
||||||
if( i == 0 )
|
if( i == 0 )
|
||||||
break;
|
break;
|
||||||
|
@ -915,6 +924,8 @@ cleanup:
|
||||||
* due to the choice of precomputed points in the modified comb method.
|
* due to the choice of precomputed points in the modified comb method.
|
||||||
* So branches for these cases do not leak secret information.
|
* So branches for these cases do not leak secret information.
|
||||||
*
|
*
|
||||||
|
* We accept Q->Z being unset (saving memory in tables) as meaning 1.
|
||||||
|
*
|
||||||
* Cost: 1A := 8M + 3S
|
* Cost: 1A := 8M + 3S
|
||||||
*/
|
*/
|
||||||
static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
|
static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
|
||||||
|
@ -933,13 +944,13 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
|
||||||
if( mpi_cmp_int( &P->Z, 0 ) == 0 )
|
if( mpi_cmp_int( &P->Z, 0 ) == 0 )
|
||||||
return( ecp_copy( R, Q ) );
|
return( ecp_copy( R, Q ) );
|
||||||
|
|
||||||
if( mpi_cmp_int( &Q->Z, 0 ) == 0 )
|
if( Q->Z.p != NULL && mpi_cmp_int( &Q->Z, 0 ) == 0 )
|
||||||
return( ecp_copy( R, P ) );
|
return( ecp_copy( R, P ) );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure Q coordinates are normalized
|
* Make sure Q coordinates are normalized
|
||||||
*/
|
*/
|
||||||
if( mpi_cmp_int( &Q->Z, 1 ) != 0 )
|
if( Q->Z.p != NULL && mpi_cmp_int( &Q->Z, 1 ) != 0 )
|
||||||
return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
|
return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
|
||||||
|
|
||||||
mpi_init( &T1 ); mpi_init( &T2 ); mpi_init( &T3 ); mpi_init( &T4 );
|
mpi_init( &T1 ); mpi_init( &T2 ); mpi_init( &T3 ); mpi_init( &T4 );
|
||||||
|
@ -1025,7 +1036,7 @@ int ecp_sub( const ecp_group *grp, ecp_point *R,
|
||||||
return( POLARSSL_ERR_ECP_FEATURE_UNAVAILABLE );
|
return( POLARSSL_ERR_ECP_FEATURE_UNAVAILABLE );
|
||||||
|
|
||||||
/* mQ = - Q */
|
/* mQ = - Q */
|
||||||
ecp_copy( &mQ, Q );
|
MPI_CHK( ecp_copy( &mQ, Q ) );
|
||||||
if( mpi_cmp_int( &mQ.Y, 0 ) != 0 )
|
if( mpi_cmp_int( &mQ.Y, 0 ) != 0 )
|
||||||
MPI_CHK( mpi_sub_mpi( &mQ.Y, &grp->P, &mQ.Y ) );
|
MPI_CHK( mpi_sub_mpi( &mQ.Y, &grp->P, &mQ.Y ) );
|
||||||
|
|
||||||
|
@ -1184,7 +1195,7 @@ static int ecp_precompute_comb( const ecp_group *grp,
|
||||||
TT[k++] = cur;
|
TT[k++] = cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
ecp_normalize_jac_many( grp, TT, k );
|
MPI_CHK( ecp_normalize_jac_many( grp, TT, k ) );
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compute the remaining ones using the minimal number of additions
|
* Compute the remaining ones using the minimal number of additions
|
||||||
|
@ -1196,25 +1207,12 @@ static int ecp_precompute_comb( const ecp_group *grp,
|
||||||
j = i;
|
j = i;
|
||||||
while( j-- )
|
while( j-- )
|
||||||
{
|
{
|
||||||
ecp_add_mixed( grp, &T[i + j], &T[j], &T[i] );
|
MPI_CHK( ecp_add_mixed( grp, &T[i + j], &T[j], &T[i] ) );
|
||||||
TT[k++] = &T[i + j];
|
TT[k++] = &T[i + j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ecp_normalize_jac_many( grp, TT, k );
|
MPI_CHK( ecp_normalize_jac_many( grp, TT, k ) );
|
||||||
|
|
||||||
/*
|
|
||||||
* Post-precessing: reclaim some memory by
|
|
||||||
* - not storing Z (always 1)
|
|
||||||
* - shrinking other coordinates
|
|
||||||
* Keep the same number of limbs as P to avoid re-growing on next use.
|
|
||||||
*/
|
|
||||||
for( i = 0; i < ( 1U << (w-1) ); i++ )
|
|
||||||
{
|
|
||||||
mpi_free( &T[i].Z );
|
|
||||||
mpi_shrink( &T[i].X, grp->P.n );
|
|
||||||
mpi_shrink( &T[i].Y, grp->P.n );
|
|
||||||
}
|
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
return( ret );
|
return( ret );
|
||||||
|
@ -1240,9 +1238,6 @@ static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
|
||||||
MPI_CHK( mpi_safe_cond_assign( &R->Y, &T[j].Y, j == ii ) );
|
MPI_CHK( mpi_safe_cond_assign( &R->Y, &T[j].Y, j == ii ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The Z coordinate is always 1 */
|
|
||||||
MPI_CHK( mpi_lset( &R->Z, 1 ) );
|
|
||||||
|
|
||||||
/* Safely invert result if i is "negative" */
|
/* Safely invert result if i is "negative" */
|
||||||
MPI_CHK( ecp_safe_invert_jac( grp, R, i >> 7 ) );
|
MPI_CHK( ecp_safe_invert_jac( grp, R, i >> 7 ) );
|
||||||
|
|
||||||
|
@ -1271,6 +1266,7 @@ static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
|
||||||
/* Start with a non-zero point and randomize its coordinates */
|
/* Start with a non-zero point and randomize its coordinates */
|
||||||
i = d;
|
i = d;
|
||||||
MPI_CHK( ecp_select_comb( grp, R, T, t_len, x[i] ) );
|
MPI_CHK( ecp_select_comb( grp, R, T, t_len, x[i] ) );
|
||||||
|
MPI_CHK( mpi_lset( &R->Z, 1 ) );
|
||||||
if( f_rng != 0 )
|
if( f_rng != 0 )
|
||||||
MPI_CHK( ecp_randomize_jac( grp, R, f_rng, p_rng ) );
|
MPI_CHK( ecp_randomize_jac( grp, R, f_rng, p_rng ) );
|
||||||
|
|
||||||
|
@ -1319,12 +1315,17 @@ static int ecp_mul_comb( ecp_group *grp, ecp_point *R,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If P == G, pre-compute a bit more, since this may be re-used later.
|
* If P == G, pre-compute a bit more, since this may be re-used later.
|
||||||
* Just adding one ups the cost of the first mul by at most 3%.
|
* Just adding one avoids upping the cost of the first mul too much,
|
||||||
|
* and the memory cost too.
|
||||||
*/
|
*/
|
||||||
|
#if POLARSSL_ECP_FIXED_POINT_OPTIM == 1
|
||||||
p_eq_g = ( mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
|
p_eq_g = ( mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
|
||||||
mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
|
mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
|
||||||
if( p_eq_g )
|
if( p_eq_g )
|
||||||
w++;
|
w++;
|
||||||
|
#else
|
||||||
|
p_eq_g = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure w is within bounds.
|
* Make sure w is within bounds.
|
||||||
|
|
|
@ -77,6 +77,7 @@ typedef struct
|
||||||
size_t total_used;
|
size_t total_used;
|
||||||
size_t maximum_used;
|
size_t maximum_used;
|
||||||
size_t header_count;
|
size_t header_count;
|
||||||
|
size_t maximum_header_count;
|
||||||
#endif
|
#endif
|
||||||
#if defined(POLARSSL_THREADING_C)
|
#if defined(POLARSSL_THREADING_C)
|
||||||
threading_mutex_t mutex;
|
threading_mutex_t mutex;
|
||||||
|
@ -335,6 +336,8 @@ static void *buffer_alloc_malloc( size_t len )
|
||||||
|
|
||||||
#if defined(POLARSSL_MEMORY_DEBUG)
|
#if defined(POLARSSL_MEMORY_DEBUG)
|
||||||
heap.header_count++;
|
heap.header_count++;
|
||||||
|
if( heap.header_count > heap.maximum_header_count )
|
||||||
|
heap.maximum_header_count = heap.header_count;
|
||||||
heap.total_used += cur->size;
|
heap.total_used += cur->size;
|
||||||
if( heap.total_used > heap.maximum_used)
|
if( heap.total_used > heap.maximum_used)
|
||||||
heap.maximum_used = heap.total_used;
|
heap.maximum_used = heap.total_used;
|
||||||
|
@ -484,8 +487,11 @@ int memory_buffer_alloc_verify()
|
||||||
void memory_buffer_alloc_status()
|
void memory_buffer_alloc_status()
|
||||||
{
|
{
|
||||||
fprintf( stderr,
|
fprintf( stderr,
|
||||||
"Current use: %u blocks / %u bytes, max: %u bytes, malloc / free: %u / %u\n",
|
"Current use: %u blocks / %u bytes, max: %u blocks / %u bytes (total %u bytes), malloc / free: %u / %u\n",
|
||||||
heap.header_count, heap.total_used, heap.maximum_used,
|
heap.header_count, heap.total_used,
|
||||||
|
heap.maximum_header_count, heap.maximum_used,
|
||||||
|
heap.maximum_header_count * sizeof( memory_header )
|
||||||
|
+ heap.maximum_used,
|
||||||
heap.malloc_count, heap.free_count );
|
heap.malloc_count, heap.free_count );
|
||||||
|
|
||||||
if( heap.first->next == NULL )
|
if( heap.first->next == NULL )
|
||||||
|
|
Loading…
Reference in a new issue