From 56941fe6a2f73aed8c114c191b16bc4ca6a01229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Mon, 17 Feb 2020 11:04:33 +0100 Subject: [PATCH] Fix possible close_notify/ClientHello confusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ssl-opt.sh test cases using session resumption tend to fail occasionally on the CI due to a race condition in how ssl_server2 and ssl_client2 handle the reconnection cycle. The server does the following in order: - S1 send application data - S2 send a close_notify alert - S3 close the client socket - S4 wait for a "new connection" (actually a new datagram) - S5 start a handshake The client does the following in order: - C1 wait for and read application data from the server - C2 send a close_notify alert - C3 close the server socket - C4 reset session data and re-open a server socket - C5 start a handshake If the client has been able to send the close_notify (C2) and if has been delivered to the server before if closes the client socket (S3), when the server reaches S4, the datagram that we start the new connection will be the ClientHello and everything will be fine. However if S3 wins the race and happens before the close_notify is delivered, in S4 the close_notify is what will be seen as the first datagram in a new connection, and then in S5 this will rightfully be rejected as not being a valid ClientHello and the server will close the connection (and go wait for another one). The client will then fail to read from the socket and exit non-zero and the ssl-opt.sh harness will correctly report this as a failure. In order to avoid this race condition in test using ssl_client2 and ssl_server2, this commits introduces a new command-line option skip_close_notify to ssl_client2 and uses it in all ssl-opt.sh tests that use session resumption with DTLS and ssl_server2. This works because ssl_server2 knows how many messages it expects in each direction and in what order, and closes the connection after that rather than relying on close_notify (which is also why there was a race in the first place). Tests that use another server (in practice there are two of them, using OpenSSL as a server) wouldn't work with skip_close_notify, as the server won't close the connection until the client sends a close_notify, but for the same reason they don't need it (there is no race between receiving close_notify and closing as the former is the cause of the later). An alternative approach would be to make ssl_server2 keep the connection open until it receives a close_notify. Unfortunately it creates problems for tests where we simulate a lossy network, as the close_notify could be lost (and the client can't retransmit it). We could modify udp_proxy with an option to never drop alert messages, but when TLS 1.3 comes that would no longer work as the type of messages will be encrypted. Signed-off-by: Manuel Pégourié-Gonnard --- programs/ssl/ssl_client2.c | 33 +++++++++++++++++++++++++++++---- tests/ssl-opt.sh | 38 +++++++++++++++++++------------------- 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/programs/ssl/ssl_client2.c b/programs/ssl/ssl_client2.c index c188900b4..ab72f15d9 100644 --- a/programs/ssl/ssl_client2.c +++ b/programs/ssl/ssl_client2.c @@ -145,6 +145,7 @@ int main( void ) #define DFL_REPRODUCIBLE 0 #define DFL_NSS_KEYLOG 0 #define DFL_NSS_KEYLOG_FILE NULL +#define DFL_SKIP_CLOSE_NOTIFY 0 #define GET_REQUEST "GET %s HTTP/1.0\r\nExtra-header: " #define GET_REQUEST_END "\r\n\r\n" @@ -389,6 +390,7 @@ int main( void ) " options: 1 (level-triggered, implies nbio=1),\n" \ " read_timeout=%%d default: 0 ms (no timeout)\n" \ " max_resend=%%d default: 0 (no resend on timeout)\n" \ + " skip_close_notify=%%d default: 0 (send close_notify)\n" \ "\n" \ USAGE_DTLS \ USAGE_CID \ @@ -517,6 +519,7 @@ struct options const char *cid_val_renego; /* the CID to use for incoming messages * after renegotiation */ int reproducible; /* make communication reproducible */ + int skip_close_notify; /* skip sending the close_notify alert */ } opt; int query_config( const char *config ); @@ -1311,6 +1314,7 @@ int main( int argc, char *argv[] ) opt.reproducible = DFL_REPRODUCIBLE; opt.nss_keylog = DFL_NSS_KEYLOG; opt.nss_keylog_file = DFL_NSS_KEYLOG_FILE; + opt.skip_close_notify = DFL_SKIP_CLOSE_NOTIFY; for( i = 1; i < argc; i++ ) { @@ -1723,6 +1727,12 @@ int main( int argc, char *argv[] ) { opt.nss_keylog_file = q; } + else if( strcmp( p, "skip_close_notify" ) == 0 ) + { + opt.skip_close_notify = atoi( q ); + if( opt.skip_close_notify < 0 || opt.skip_close_notify > 1 ) + goto usage; + } else goto usage; } @@ -3160,10 +3170,25 @@ close_notify: mbedtls_printf( " . Closing the connection..." ); fflush( stdout ); - /* No error checking, the connection might be closed already */ - do ret = mbedtls_ssl_close_notify( &ssl ); - while( ret == MBEDTLS_ERR_SSL_WANT_WRITE ); - ret = 0; + /* + * Most of the time sending a close_notify before closing is the right + * thing to do. However, when the server already knows how many messages + * are expected and closes the connection by itself, this alert becomes + * redundant. Sometimes with DTLS this redundancy becomes a problem by + * leading to a race condition where the server might close the connection + * before seeing the alert, and since UDP is connection-less when the + * alert arrives it will be seen as a new connection, which will fail as + * the alert is clearly not a valid ClientHello. This may cause spurious + * failures in tests that use DTLS and resumption with ssl_server2 in + * ssl-opt.sh, avoided by enabling skip_close_notify client-side. + */ + if( opt.skip_close_notify == 0 ) + { + /* No error checking, the connection might be closed already */ + do ret = mbedtls_ssl_close_notify( &ssl ); + while( ret == MBEDTLS_ERR_SSL_WANT_WRITE ); + ret = 0; + } mbedtls_printf( " done\n" ); diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index e65b8eca9..8585b25d2 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -2535,7 +2535,7 @@ run_test "Session resume using tickets: openssl client" \ run_test "Session resume using tickets, DTLS: basic" \ "$P_SRV debug_level=3 dtls=1 tickets=1" \ - "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1" \ + "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1 skip_close_notify=1" \ 0 \ -c "client hello, adding session ticket extension" \ -s "found session ticket extension" \ @@ -2549,7 +2549,7 @@ run_test "Session resume using tickets, DTLS: basic" \ run_test "Session resume using tickets, DTLS: cache disabled" \ "$P_SRV debug_level=3 dtls=1 tickets=1 cache_max=0" \ - "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1" \ + "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1 skip_close_notify=1" \ 0 \ -c "client hello, adding session ticket extension" \ -s "found session ticket extension" \ @@ -2563,7 +2563,7 @@ run_test "Session resume using tickets, DTLS: cache disabled" \ run_test "Session resume using tickets, DTLS: timeout" \ "$P_SRV debug_level=3 dtls=1 tickets=1 cache_max=0 ticket_timeout=1" \ - "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1 reco_delay=2" \ + "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1 skip_close_notify=1 reco_delay=2" \ 0 \ -c "client hello, adding session ticket extension" \ -s "found session ticket extension" \ @@ -2577,7 +2577,7 @@ run_test "Session resume using tickets, DTLS: timeout" \ run_test "Session resume using tickets, DTLS: session copy" \ "$P_SRV debug_level=3 dtls=1 tickets=1 cache_max=0" \ - "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1 reco_mode=0" \ + "$P_CLI debug_level=3 dtls=1 tickets=1 reconnect=1 skip_close_notify=1 reco_mode=0" \ 0 \ -c "client hello, adding session ticket extension" \ -s "found session ticket extension" \ @@ -2718,7 +2718,7 @@ run_test "Session resume using cache: openssl server" \ run_test "Session resume using cache, DTLS: tickets enabled on client" \ "$P_SRV dtls=1 debug_level=3 tickets=0" \ - "$P_CLI dtls=1 debug_level=3 tickets=1 reconnect=1" \ + "$P_CLI dtls=1 debug_level=3 tickets=1 reconnect=1 skip_close_notify=1" \ 0 \ -c "client hello, adding session ticket extension" \ -s "found session ticket extension" \ @@ -2732,7 +2732,7 @@ run_test "Session resume using cache, DTLS: tickets enabled on client" \ run_test "Session resume using cache, DTLS: tickets enabled on server" \ "$P_SRV dtls=1 debug_level=3 tickets=1" \ - "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1" \ + "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 skip_close_notify=1" \ 0 \ -C "client hello, adding session ticket extension" \ -S "found session ticket extension" \ @@ -2746,7 +2746,7 @@ run_test "Session resume using cache, DTLS: tickets enabled on server" \ run_test "Session resume using cache, DTLS: cache_max=0" \ "$P_SRV dtls=1 debug_level=3 tickets=0 cache_max=0" \ - "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1" \ + "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 skip_close_notify=1" \ 0 \ -S "session successfully restored from cache" \ -S "session successfully restored from ticket" \ @@ -2755,7 +2755,7 @@ run_test "Session resume using cache, DTLS: cache_max=0" \ run_test "Session resume using cache, DTLS: cache_max=1" \ "$P_SRV dtls=1 debug_level=3 tickets=0 cache_max=1" \ - "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1" \ + "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 skip_close_notify=1" \ 0 \ -s "session successfully restored from cache" \ -S "session successfully restored from ticket" \ @@ -2764,7 +2764,7 @@ run_test "Session resume using cache, DTLS: cache_max=1" \ run_test "Session resume using cache, DTLS: timeout > delay" \ "$P_SRV dtls=1 debug_level=3 tickets=0" \ - "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 reco_delay=0" \ + "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 skip_close_notify=1 reco_delay=0" \ 0 \ -s "session successfully restored from cache" \ -S "session successfully restored from ticket" \ @@ -2773,7 +2773,7 @@ run_test "Session resume using cache, DTLS: timeout > delay" \ run_test "Session resume using cache, DTLS: timeout < delay" \ "$P_SRV dtls=1 debug_level=3 tickets=0 cache_timeout=1" \ - "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 reco_delay=2" \ + "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 skip_close_notify=1 reco_delay=2" \ 0 \ -S "session successfully restored from cache" \ -S "session successfully restored from ticket" \ @@ -2782,7 +2782,7 @@ run_test "Session resume using cache, DTLS: timeout < delay" \ run_test "Session resume using cache, DTLS: no timeout" \ "$P_SRV dtls=1 debug_level=3 tickets=0 cache_timeout=0" \ - "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 reco_delay=2" \ + "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 skip_close_notify=1 reco_delay=2" \ 0 \ -s "session successfully restored from cache" \ -S "session successfully restored from ticket" \ @@ -2791,7 +2791,7 @@ run_test "Session resume using cache, DTLS: no timeout" \ run_test "Session resume using cache, DTLS: session copy" \ "$P_SRV dtls=1 debug_level=3 tickets=0" \ - "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 reco_mode=0" \ + "$P_CLI dtls=1 debug_level=3 tickets=0 reconnect=1 skip_close_notify=1 reco_mode=0" \ 0 \ -s "session successfully restored from cache" \ -S "session successfully restored from ticket" \ @@ -4543,19 +4543,19 @@ run_test "Event-driven I/O, DTLS: ticket + client auth" \ run_test "Event-driven I/O, DTLS: ticket + client auth + resume" \ "$P_SRV dtls=1 event=1 tickets=1 auth_mode=required" \ - "$P_CLI dtls=1 event=1 tickets=1 reconnect=1" \ + "$P_CLI dtls=1 event=1 tickets=1 reconnect=1 skip_close_notify=1" \ 0 \ -c "Read from server: .* bytes read" run_test "Event-driven I/O, DTLS: ticket + resume" \ "$P_SRV dtls=1 event=1 tickets=1 auth_mode=none" \ - "$P_CLI dtls=1 event=1 tickets=1 reconnect=1" \ + "$P_CLI dtls=1 event=1 tickets=1 reconnect=1 skip_close_notify=1" \ 0 \ -c "Read from server: .* bytes read" run_test "Event-driven I/O, DTLS: session-id resume" \ "$P_SRV dtls=1 event=1 tickets=0 auth_mode=none" \ - "$P_CLI dtls=1 event=1 tickets=0 reconnect=1" \ + "$P_CLI dtls=1 event=1 tickets=0 reconnect=1 skip_close_notify=1" \ 0 \ -c "Read from server: .* bytes read" @@ -4567,7 +4567,7 @@ run_test "Event-driven I/O, DTLS: session-id resume" \ run_test "Event-driven I/O, DTLS: session-id resume, UDP packing" \ -p "$P_PXY pack=50" \ "$P_SRV dtls=1 event=1 tickets=0 auth_mode=required" \ - "$P_CLI dtls=1 event=1 tickets=0 reconnect=1" \ + "$P_CLI dtls=1 event=1 tickets=0 reconnect=1 skip_close_notify=1" \ 0 \ -c "Read from server: .* bytes read" @@ -7807,7 +7807,7 @@ run_test "DTLS fragmenting: proxy MTU, resumed handshake" \ key_file=data_files/server8.key \ hs_timeout=10000-60000 \ force_ciphersuite=TLS-ECDHE-ECDSA-WITH-AES-128-GCM-SHA256 \ - mtu=1450 reconnect=1 reco_delay=1" \ + mtu=1450 reconnect=1 skip_close_notify=1 reco_delay=1" \ 0 \ -S "autoreduction" \ -s "found fragmented DTLS handshake message" \ @@ -8656,7 +8656,7 @@ run_test "DTLS proxy: 3d, min handshake, resumption" \ "$P_SRV dtls=1 dgram_packing=0 hs_timeout=500-10000 tickets=0 auth_mode=none \ psk=abc123 debug_level=3" \ "$P_CLI dtls=1 dgram_packing=0 hs_timeout=500-10000 tickets=0 psk=abc123 \ - debug_level=3 reconnect=1 read_timeout=1000 max_resend=10 \ + debug_level=3 reconnect=1 skip_close_notify=1 read_timeout=1000 max_resend=10 \ force_ciphersuite=TLS-PSK-WITH-AES-128-CCM-8" \ 0 \ -s "a session has been resumed" \ @@ -8670,7 +8670,7 @@ run_test "DTLS proxy: 3d, min handshake, resumption, nbio" \ "$P_SRV dtls=1 dgram_packing=0 hs_timeout=500-10000 tickets=0 auth_mode=none \ psk=abc123 debug_level=3 nbio=2" \ "$P_CLI dtls=1 dgram_packing=0 hs_timeout=500-10000 tickets=0 psk=abc123 \ - debug_level=3 reconnect=1 read_timeout=1000 max_resend=10 \ + debug_level=3 reconnect=1 skip_close_notify=1 read_timeout=1000 max_resend=10 \ force_ciphersuite=TLS-PSK-WITH-AES-128-CCM-8 nbio=2" \ 0 \ -s "a session has been resumed" \