mirror of
				https://github.com/yuzu-emu/mbedtls.git
				synced 2025-11-04 15:25:03 +00:00 
			
		
		
		
	Put local variables in a struct
This way we can have a single call to mbedtls_platform_zeroize, which saves a few bytes of code size. Additionally, on my PC, I notice a significant speed improvement (x86_64 build with MBEDTLS_AESNI_C disabled, gcc 5.4.0 -O3). I don't have an explanation for that (I expected no measurable difference). Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
This commit is contained in:
		
							parent
							
								
									ee4d4eb625
								
							
						
					
					
						commit
						5197c6692c
					
				
							
								
								
									
										162
									
								
								library/aes.c
									
									
									
									
									
								
							
							
						
						
									
										162
									
								
								library/aes.c
									
									
									
									
									
								
							| 
						 | 
					@ -867,61 +867,56 @@ int mbedtls_internal_aes_encrypt( mbedtls_aes_context *ctx,
 | 
				
			||||||
                                  unsigned char output[16] )
 | 
					                                  unsigned char output[16] )
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
    uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3;
 | 
					    uint32_t *RK = ctx->rk;
 | 
				
			||||||
 | 
					    struct
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        uint32_t X[4];
 | 
				
			||||||
 | 
					        uint32_t Y[4];
 | 
				
			||||||
 | 
					    } t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RK = ctx->rk;
 | 
					    GET_UINT32_LE( t.X[0], input,  0 ); t.X[0] ^= *RK++;
 | 
				
			||||||
 | 
					    GET_UINT32_LE( t.X[1], input,  4 ); t.X[1] ^= *RK++;
 | 
				
			||||||
    GET_UINT32_LE( X0, input,  0 ); X0 ^= *RK++;
 | 
					    GET_UINT32_LE( t.X[2], input,  8 ); t.X[2] ^= *RK++;
 | 
				
			||||||
    GET_UINT32_LE( X1, input,  4 ); X1 ^= *RK++;
 | 
					    GET_UINT32_LE( t.X[3], input, 12 ); t.X[3] ^= *RK++;
 | 
				
			||||||
    GET_UINT32_LE( X2, input,  8 ); X2 ^= *RK++;
 | 
					 | 
				
			||||||
    GET_UINT32_LE( X3, input, 12 ); X3 ^= *RK++;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- )
 | 
					    for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- )
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
 | 
					        AES_FROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] );
 | 
				
			||||||
        AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 );
 | 
					        AES_FROUND( t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3] );
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
 | 
					    AES_FROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X0 = *RK++ ^ \
 | 
					    t.X[0] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y0       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[0]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y1 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[1] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[2] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y3 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) FSb[ ( t.Y[3] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X1 = *RK++ ^ \
 | 
					    t.X[1] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y1       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[1]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y2 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[2] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[3] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y0 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) FSb[ ( t.Y[0] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X2 = *RK++ ^ \
 | 
					    t.X[2] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y2       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[2]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y3 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[3] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[0] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y1 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) FSb[ ( t.Y[1] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X3 = *RK++ ^ \
 | 
					    t.X[3] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y3       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[3]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y0 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[0] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) FSb[ ( t.Y[1] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) FSb[ ( Y2 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) FSb[ ( t.Y[2] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PUT_UINT32_LE( X0, output,  0 );
 | 
					    PUT_UINT32_LE( t.X[0], output,  0 );
 | 
				
			||||||
    PUT_UINT32_LE( X1, output,  4 );
 | 
					    PUT_UINT32_LE( t.X[1], output,  4 );
 | 
				
			||||||
    PUT_UINT32_LE( X2, output,  8 );
 | 
					    PUT_UINT32_LE( t.X[2], output,  8 );
 | 
				
			||||||
    PUT_UINT32_LE( X3, output, 12 );
 | 
					    PUT_UINT32_LE( t.X[3], output, 12 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    mbedtls_platform_zeroize( &X0, sizeof( X0 ) );
 | 
					    mbedtls_platform_zeroize( &t, sizeof( t ) );
 | 
				
			||||||
    mbedtls_platform_zeroize( &X1, sizeof( X1 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &X2, sizeof( X2 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &X3, sizeof( X3 ) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y0, sizeof( Y0 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y1, sizeof( Y1 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y2, sizeof( Y2 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y3, sizeof( Y3 ) );
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return( 0 );
 | 
					    return( 0 );
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -945,61 +940,56 @@ int mbedtls_internal_aes_decrypt( mbedtls_aes_context *ctx,
 | 
				
			||||||
                                  unsigned char output[16] )
 | 
					                                  unsigned char output[16] )
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
    uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3;
 | 
					    uint32_t *RK = ctx->rk;
 | 
				
			||||||
 | 
					    struct
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        uint32_t X[4];
 | 
				
			||||||
 | 
					        uint32_t Y[4];
 | 
				
			||||||
 | 
					    } t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RK = ctx->rk;
 | 
					    GET_UINT32_LE( t.X[0], input,  0 ); t.X[0] ^= *RK++;
 | 
				
			||||||
 | 
					    GET_UINT32_LE( t.X[1], input,  4 ); t.X[1] ^= *RK++;
 | 
				
			||||||
    GET_UINT32_LE( X0, input,  0 ); X0 ^= *RK++;
 | 
					    GET_UINT32_LE( t.X[2], input,  8 ); t.X[2] ^= *RK++;
 | 
				
			||||||
    GET_UINT32_LE( X1, input,  4 ); X1 ^= *RK++;
 | 
					    GET_UINT32_LE( t.X[3], input, 12 ); t.X[3] ^= *RK++;
 | 
				
			||||||
    GET_UINT32_LE( X2, input,  8 ); X2 ^= *RK++;
 | 
					 | 
				
			||||||
    GET_UINT32_LE( X3, input, 12 ); X3 ^= *RK++;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- )
 | 
					    for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- )
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
 | 
					        AES_RROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] );
 | 
				
			||||||
        AES_RROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 );
 | 
					        AES_RROUND( t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3] );
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
 | 
					    AES_RROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X0 = *RK++ ^ \
 | 
					    t.X[0] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y0       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[0]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y3 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[3] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[2] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y1 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) RSb[ ( t.Y[1] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X1 = *RK++ ^ \
 | 
					    t.X[1] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y1       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[1]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y0 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[0] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[3] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y2 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) RSb[ ( t.Y[2] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X2 = *RK++ ^ \
 | 
					    t.X[2] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y2       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[2]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y1 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[1] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[0] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y3 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) RSb[ ( t.Y[3] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    X3 = *RK++ ^ \
 | 
					    t.X[3] = *RK++ ^ \
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y3       ) & 0xFF ]       ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[3]       ) & 0xFF ]       ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y2 >>  8 ) & 0xFF ] <<  8 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[2] >>  8 ) & 0xFF ] <<  8 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^
 | 
					            ( (uint32_t) RSb[ ( t.Y[1] >> 16 ) & 0xFF ] << 16 ) ^
 | 
				
			||||||
            ( (uint32_t) RSb[ ( Y0 >> 24 ) & 0xFF ] << 24 );
 | 
					            ( (uint32_t) RSb[ ( t.Y[0] >> 24 ) & 0xFF ] << 24 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PUT_UINT32_LE( X0, output,  0 );
 | 
					    PUT_UINT32_LE( t.X[0], output,  0 );
 | 
				
			||||||
    PUT_UINT32_LE( X1, output,  4 );
 | 
					    PUT_UINT32_LE( t.X[1], output,  4 );
 | 
				
			||||||
    PUT_UINT32_LE( X2, output,  8 );
 | 
					    PUT_UINT32_LE( t.X[2], output,  8 );
 | 
				
			||||||
    PUT_UINT32_LE( X3, output, 12 );
 | 
					    PUT_UINT32_LE( t.X[3], output, 12 );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    mbedtls_platform_zeroize( &X0, sizeof( X0 ) );
 | 
					    mbedtls_platform_zeroize( &t, sizeof( t ) );
 | 
				
			||||||
    mbedtls_platform_zeroize( &X1, sizeof( X1 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &X2, sizeof( X2 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &X3, sizeof( X3 ) );
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y0, sizeof( Y0 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y1, sizeof( Y1 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y2, sizeof( Y2 ) );
 | 
					 | 
				
			||||||
    mbedtls_platform_zeroize( &Y3, sizeof( Y3 ) );
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return( 0 );
 | 
					    return( 0 );
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue