From 606671e06e91fd660c02db4878cbf8799551ed09 Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Fri, 17 Feb 2023 11:36:33 +0100 Subject: [PATCH 01/53] tls13: server: Check mbedtls_ssl_set_hs_psk returned value Signed-off-by: Ronald Cron --- library/ssl_tls13_server.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c index 81c289aee5..b91cde6375 100644 --- a/library/ssl_tls13_server.c +++ b/library/ssl_tls13_server.c @@ -258,6 +258,8 @@ static int ssl_tls13_offered_psks_check_identity_match( int *psk_type, mbedtls_ssl_session *session) { + int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; + ((void) session); ((void) obfuscated_ticket_age); *psk_type = MBEDTLS_SSL_TLS1_3_PSK_EXTERNAL; @@ -271,9 +273,13 @@ static int ssl_tls13_offered_psks_check_identity_match( session) == SSL_TLS1_3_OFFERED_PSK_MATCH) { ssl->handshake->resume = 1; *psk_type = MBEDTLS_SSL_TLS1_3_PSK_RESUMPTION; - mbedtls_ssl_set_hs_psk(ssl, - session->resumption_key, - session->resumption_key_len); + ret = mbedtls_ssl_set_hs_psk(ssl, + session->resumption_key, + session->resumption_key_len); + if (ret != 0) { + MBEDTLS_SSL_DEBUG_RET(1, "mbedtls_ssl_set_hs_psk", ret); + return ret; + } MBEDTLS_SSL_DEBUG_BUF(4, "Ticket-resumed PSK:", session->resumption_key, @@ -299,7 +305,11 @@ static int ssl_tls13_offered_psks_check_identity_match( identity_len == ssl->conf->psk_identity_len && mbedtls_ct_memcmp(ssl->conf->psk_identity, identity, identity_len) == 0) { - mbedtls_ssl_set_hs_psk(ssl, ssl->conf->psk, ssl->conf->psk_len); + ret = mbedtls_ssl_set_hs_psk(ssl, ssl->conf->psk, ssl->conf->psk_len); + if (ret != 0) { + MBEDTLS_SSL_DEBUG_RET(1, "mbedtls_ssl_set_hs_psk", ret); + return ret; + } return SSL_TLS1_3_OFFERED_PSK_MATCH; } From fc7ae87ad49308024dd76519efe2b54ff0904a90 Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Thu, 16 Feb 2023 15:32:19 +0100 Subject: [PATCH 02/53] tls13: server: Check ciphersuite list length parity once Check ciphersuite list length parity once, mainly to enable the possibility of getting out of the loop of the ciphersuites whenever we want. Signed-off-by: Ronald Cron --- library/ssl_tls13_server.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c index b91cde6375..f557d7f40e 100644 --- a/library/ssl_tls13_server.c +++ b/library/ssl_tls13_server.c @@ -1333,6 +1333,15 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl, cipher_suites_len = MBEDTLS_GET_UINT16_BE(p, 0); p += 2; + /* + * The length of the ciphersuite list has to be even. + */ + if (cipher_suites_len & 1) { + MBEDTLS_SSL_PEND_FATAL_ALERT(MBEDTLS_SSL_ALERT_MSG_DECODE_ERROR, + MBEDTLS_ERR_SSL_DECODE_ERROR); + return MBEDTLS_ERR_SSL_DECODE_ERROR; + } + /* Check we have enough data for the ciphersuite list, the legacy * compression methods and the length of the extensions. * @@ -1362,8 +1371,6 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl, uint16_t cipher_suite; const mbedtls_ssl_ciphersuite_t *ciphersuite_info; - MBEDTLS_SSL_CHK_BUF_READ_PTR(p, cipher_suites_end, 2); - cipher_suite = MBEDTLS_GET_UINT16_BE(p, 0); ciphersuite_info = ssl_tls13_validate_peer_ciphersuite( ssl, cipher_suite); From 25e9ec61f07cd1d6fa8f483e03dbde8787f4f235 Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Thu, 16 Feb 2023 15:35:16 +0100 Subject: [PATCH 03/53] tls13: server: Select preferred cipher suite Signed-off-by: Ronald Cron --- library/ssl_tls13_server.c | 2 ++ tests/ssl-opt.sh | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c index f557d7f40e..8aff191115 100644 --- a/library/ssl_tls13_server.c +++ b/library/ssl_tls13_server.c @@ -1383,6 +1383,7 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl, MBEDTLS_SSL_DEBUG_MSG(2, ("selected ciphersuite: %04x - %s", cipher_suite, ciphersuite_info->name)); + break; } if (handshake->ciphersuite_info == NULL) { @@ -1390,6 +1391,7 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl, MBEDTLS_ERR_SSL_HANDSHAKE_FAILURE); return MBEDTLS_ERR_SSL_HANDSHAKE_FAILURE; } + p = cipher_suites_end; /* ... * opaque legacy_compression_methods<1..2^8-1>; diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index c176d0d628..b15fe16f7d 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -13222,7 +13222,7 @@ requires_config_enabled MBEDTLS_DEBUG_C requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED -run_test "TLS 1.3: NewSessionTicket: Basic check, G->m" \ +run_test "TLS 1.3: NewSessionTicket: resumption failure, PSK len too big, G->m" \ "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \ "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \ 0 \ @@ -13231,9 +13231,9 @@ run_test "TLS 1.3: NewSessionTicket: Basic check, G->m" \ -s "=> write NewSessionTicket msg" \ -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET" \ -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET_FLUSH" \ + -s "mbedtls_ssl_set_hs_psk() returned" \ -s "key exchange mode: ephemeral" \ - -s "key exchange mode: psk_ephemeral" \ - -s "found pre_shared_key extension" + -S "key exchange mode: psk_ephemeral" requires_config_enabled MBEDTLS_SSL_SESSION_TICKETS requires_config_enabled MBEDTLS_SSL_SRV_C From 0a1c50415636405ab17a4b5a310d83d7fba0a64f Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Mon, 20 Feb 2023 10:44:22 +0100 Subject: [PATCH 04/53] tls13: Fix session resumption with 384 bits PSKs MBEDTLS_PSK_MAX_LEN main purpose is to determine a miximum size for the TLS 1.2 pre-master secret. This is not relevant to TLS 1.3 thus disable in TLS 1.3 case the check against MBEDTLS_PSK_MAX_LEN when setting during the handshake the PSK through mbedtls_ssl_set_hs_psk(). This fixes the session resumption with 384 bits PSKs when MBEDTLS_PSK_MAX_LEN is smaller than that. Signed-off-by: Ronald Cron --- library/ssl_tls.c | 5 ++++- tests/ssl-opt.sh | 28 +++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/library/ssl_tls.c b/library/ssl_tls.c index 86f5c0b555..4312f154af 100644 --- a/library/ssl_tls.c +++ b/library/ssl_tls.c @@ -2145,9 +2145,12 @@ int mbedtls_ssl_set_hs_psk(mbedtls_ssl_context *ssl, return MBEDTLS_ERR_SSL_BAD_INPUT_DATA; } - if (psk_len > MBEDTLS_PSK_MAX_LEN) { +#if defined(MBEDTLS_SSL_PROTO_TLS1_2) + if (ssl->tls_version == MBEDTLS_SSL_VERSION_TLS1_2 && + psk_len > MBEDTLS_PSK_MAX_LEN) { return MBEDTLS_ERR_SSL_BAD_INPUT_DATA; } +#endif /* MBEDTLS_SSL_PROTO_TLS1_2 */ ssl_remove_psk(ssl); diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index b15fe16f7d..3f8b203a70 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -13222,7 +13222,7 @@ requires_config_enabled MBEDTLS_DEBUG_C requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED -run_test "TLS 1.3: NewSessionTicket: resumption failure, PSK len too big, G->m" \ +run_test "TLS 1.3: NewSessionTicket: Basic check" \ "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \ "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \ 0 \ @@ -13231,9 +13231,31 @@ run_test "TLS 1.3: NewSessionTicket: resumption failure, PSK len too big, G-> -s "=> write NewSessionTicket msg" \ -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET" \ -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET_FLUSH" \ - -s "mbedtls_ssl_set_hs_psk() returned" \ -s "key exchange mode: ephemeral" \ - -S "key exchange mode: psk_ephemeral" + -s "key exchange mode: psk_ephemeral" \ + -s "found pre_shared_key extension" + +requires_gnutls_tls1_3 +requires_config_enabled MBEDTLS_SSL_SESSION_TICKETS +requires_config_enabled MBEDTLS_SSL_SRV_C +requires_config_enabled MBEDTLS_DEBUG_C +requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \ + MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \ + MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED +requires_ciphersuite_enabled TLS1-3-AES-256-GCM-SHA384 +run_test "TLS 1.3: NewSessionTicket: Basic check with AES-256-GCM only, G->m" \ + "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \ + "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3:-CIPHER-ALL:+AES-256-GCM -V -r" \ + 0 \ + -c "Connecting again- trying to resume previous session" \ + -c "NEW SESSION TICKET (4) was received" \ + -s "Ciphersuite is TLS1-3-AES-256-GCM-SHA384" \ + -s "=> write NewSessionTicket msg" \ + -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET" \ + -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET_FLUSH" \ + -s "key exchange mode: ephemeral" \ + -s "key exchange mode: psk_ephemeral" \ + -s "found pre_shared_key extension" requires_config_enabled MBEDTLS_SSL_SESSION_TICKETS requires_config_enabled MBEDTLS_SSL_SRV_C From b18c67af5fc7c47f6251c6ea0b64fcec12109ee7 Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Thu, 16 Feb 2023 16:57:16 +0100 Subject: [PATCH 05/53] tls13: ssl-opt.sh: Add test of default crypto algo Signed-off-by: Ronald Cron --- tests/ssl-opt.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index 3f8b203a70..68641385ac 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -11468,6 +11468,21 @@ run_test "TLS 1.3: Test gnutls tls1_3 feature" \ -c "Version: TLS1.3" # TLS1.3 test cases +requires_config_enabled MBEDTLS_SSL_PROTO_TLS1_3 +requires_config_enabled MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED +requires_ciphersuite_enabled TLS1-3-AES-128-GCM-SHA256 +requires_config_enabled MBEDTLS_ECP_DP_CURVE25519_ENABLED +requires_config_enabled MBEDTLS_ECP_DP_SECP256R1_ENABLED +requires_config_enabled MBEDTLS_ECDSA_C +run_test "TLS 1.3: Default" \ + "$P_SRV allow_sha1=0 debug_level=3 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13" \ + "$P_CLI allow_sha1=0" \ + 0 \ + -s "Protocol is TLSv1.3" \ + -s "Ciphersuite is TLS1-3-AES-128-GCM-SHA256" \ + -s "ECDH group: x25519" \ + -s "selected signature algorithm ecdsa_secp256r1_sha256" + requires_openssl_tls1_3 requires_config_enabled MBEDTLS_DEBUG_C requires_config_enabled MBEDTLS_SSL_CLI_C From 4bb67736400540f3f2afb38e2abfae6cfd02018a Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Thu, 16 Feb 2023 15:51:18 +0100 Subject: [PATCH 06/53] tls13: Apply same preference rules for ciphersuites as for TLS 1.2 Signed-off-by: Ronald Cron --- library/ssl_ciphersuites.c | 4 ++-- tests/ssl-opt.sh | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/library/ssl_ciphersuites.c b/library/ssl_ciphersuites.c index 33789c4633..501608a5a8 100644 --- a/library/ssl_ciphersuites.c +++ b/library/ssl_ciphersuites.c @@ -52,9 +52,9 @@ static const int ciphersuite_preference[] = #else #if defined(MBEDTLS_SSL_PROTO_TLS1_3) /* TLS 1.3 ciphersuites */ - MBEDTLS_TLS1_3_AES_128_GCM_SHA256, - MBEDTLS_TLS1_3_AES_256_GCM_SHA384, MBEDTLS_TLS1_3_CHACHA20_POLY1305_SHA256, + MBEDTLS_TLS1_3_AES_256_GCM_SHA384, + MBEDTLS_TLS1_3_AES_128_GCM_SHA256, MBEDTLS_TLS1_3_AES_128_CCM_SHA256, MBEDTLS_TLS1_3_AES_128_CCM_8_SHA256, #endif /* MBEDTLS_SSL_PROTO_TLS1_3 */ diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index 68641385ac..3b493ee391 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -11470,7 +11470,7 @@ run_test "TLS 1.3: Test gnutls tls1_3 feature" \ # TLS1.3 test cases requires_config_enabled MBEDTLS_SSL_PROTO_TLS1_3 requires_config_enabled MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED -requires_ciphersuite_enabled TLS1-3-AES-128-GCM-SHA256 +requires_ciphersuite_enabled TLS1-3-CHACHA20-POLY1305-SHA256 requires_config_enabled MBEDTLS_ECP_DP_CURVE25519_ENABLED requires_config_enabled MBEDTLS_ECP_DP_SECP256R1_ENABLED requires_config_enabled MBEDTLS_ECDSA_C @@ -11479,7 +11479,7 @@ run_test "TLS 1.3: Default" \ "$P_CLI allow_sha1=0" \ 0 \ -s "Protocol is TLSv1.3" \ - -s "Ciphersuite is TLS1-3-AES-128-GCM-SHA256" \ + -s "Ciphersuite is TLS1-3-CHACHA20-POLY1305-SHA256" \ -s "ECDH group: x25519" \ -s "selected signature algorithm ecdsa_secp256r1_sha256" @@ -11503,7 +11503,7 @@ run_test "TLS 1.3: minimal feature sets - openssl" \ -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \ -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \ -c "<= ssl_tls13_process_server_hello" \ - -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \ + -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \ -c "ECDH curve: x25519" \ -c "=> ssl_tls13_process_server_hello" \ -c "<= parse encrypted extensions" \ @@ -11537,7 +11537,7 @@ run_test "TLS 1.3: minimal feature sets - gnutls" \ -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \ -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \ -c "<= ssl_tls13_process_server_hello" \ - -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \ + -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \ -c "ECDH curve: x25519" \ -c "=> ssl_tls13_process_server_hello" \ -c "<= parse encrypted extensions" \ @@ -11570,7 +11570,7 @@ run_test "TLS 1.3: alpn - openssl" \ -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \ -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \ -c "<= ssl_tls13_process_server_hello" \ - -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \ + -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \ -c "ECDH curve: x25519" \ -c "=> ssl_tls13_process_server_hello" \ -c "<= parse encrypted extensions" \ @@ -11606,7 +11606,7 @@ run_test "TLS 1.3: alpn - gnutls" \ -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \ -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \ -c "<= ssl_tls13_process_server_hello" \ - -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \ + -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \ -c "ECDH curve: x25519" \ -c "=> ssl_tls13_process_server_hello" \ -c "<= parse encrypted extensions" \ From 675d97d42e16fa4e55d80d76596b36c56730589b Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Fri, 17 Feb 2023 15:49:03 +0100 Subject: [PATCH 07/53] Add change log Signed-off-by: Ronald Cron --- .../tls13-reorder-ciphersuite-preference-list.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt diff --git a/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt new file mode 100644 index 0000000000..948bc882a4 --- /dev/null +++ b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt @@ -0,0 +1,11 @@ +Default behavior changes + * The default priority order of TLS 1.3 cipher suites has been modified to + follow the same rules as the TLS 1.2 cipher suites (see + ssl_ciphersuites.c). + +Bugfix + * In the TLS 1.3 server, select the prefered client cipher suite, not the + least prefered. The selection error was introduced in Mbed TLS 3.3.0. + * Fix TLS 1.3 session resumption when the established pre-shared key is + 384 bits long. That is the length of pre-shared keys created under a + session where the cipher suite is TLS_AES_256_GCM_SHA384. From d89360b87bb5bcf09a21c74a799056c49260b260 Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Tue, 21 Feb 2023 08:53:33 +0100 Subject: [PATCH 08/53] Fix and improve documentation, comments and logs Signed-off-by: Ronald Cron --- ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt | 7 ++++--- library/ssl_tls13_server.c | 5 +++++ tests/ssl-opt.sh | 5 ++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt index 948bc882a4..1d3406854d 100644 --- a/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt +++ b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt @@ -1,11 +1,12 @@ Default behavior changes * The default priority order of TLS 1.3 cipher suites has been modified to follow the same rules as the TLS 1.2 cipher suites (see - ssl_ciphersuites.c). + ssl_ciphersuites.c). The preferred cipher suite is now + TLS_CHACHA20_POLY1305_SHA256. Bugfix - * In the TLS 1.3 server, select the prefered client cipher suite, not the - least prefered. The selection error was introduced in Mbed TLS 3.3.0. + * In the TLS 1.3 server, select the preferred client cipher suite, not the + least preferred. The selection error was introduced in Mbed TLS 3.3.0. * Fix TLS 1.3 session resumption when the established pre-shared key is 384 bits long. That is the length of pre-shared keys created under a session where the cipher suite is TLS_AES_256_GCM_SHA384. diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c index 8aff191115..005a1d7999 100644 --- a/library/ssl_tls13_server.c +++ b/library/ssl_tls13_server.c @@ -1371,6 +1371,11 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl, uint16_t cipher_suite; const mbedtls_ssl_ciphersuite_t *ciphersuite_info; + /* + * "cipher_suite_end - p is even" is an invariant of the loop. As + * cipher_suites_end - p > 0, we have cipher_suites_end - p >= 2 and + * it is thus safe to read two bytes. + */ cipher_suite = MBEDTLS_GET_UINT16_BE(p, 0); ciphersuite_info = ssl_tls13_validate_peer_ciphersuite( ssl, cipher_suite); diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index 3b493ee391..b1ee654938 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -13237,7 +13237,7 @@ requires_config_enabled MBEDTLS_DEBUG_C requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED -run_test "TLS 1.3: NewSessionTicket: Basic check" \ +run_test "TLS 1.3: NewSessionTicket: Basic check, G->m" \ "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \ "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \ 0 \ @@ -13257,6 +13257,9 @@ requires_config_enabled MBEDTLS_DEBUG_C requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED +# Test the session resumption when the cipher suite for the original session is +# TLS1-3-AES-256-GCM-SHA384. In that case, the PSK is 384 bits long and not +# 256 bits long as with all the other TLS 1.3 cipher suites. requires_ciphersuite_enabled TLS1-3-AES-256-GCM-SHA384 run_test "TLS 1.3: NewSessionTicket: Basic check with AES-256-GCM only, G->m" \ "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \ From 1aa6e8d6e9f5af25c24a2484d774f9a879aa873a Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Thu, 23 Feb 2023 09:46:54 +0100 Subject: [PATCH 09/53] Restore same PSK length enforcement Restore same PSK length enforcement in conf_psk and set_hs_psk, whether the negotiated protocol is TLS 1.2 or TLS 1.3. Signed-off-by: Ronald Cron --- include/mbedtls/mbedtls_config.h | 2 +- include/mbedtls/ssl.h | 16 +++++++++++++++- library/ssl_tls.c | 5 +---- tests/ssl-opt.sh | 2 +- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h index 9ae51c964a..4c676c520d 100644 --- a/include/mbedtls/mbedtls_config.h +++ b/include/mbedtls/mbedtls_config.h @@ -3761,7 +3761,7 @@ */ //#define MBEDTLS_SSL_DTLS_MAX_BUFFERING 32768 -//#define MBEDTLS_PSK_MAX_LEN 32 /**< Max size of TLS pre-shared keys, in bytes (default 256 bits) */ +//#define MBEDTLS_PSK_MAX_LEN 32 /**< Max size of TLS pre-shared keys, in bytes (default 256 or 384 bits) */ //#define MBEDTLS_SSL_COOKIE_TIMEOUT 60 /**< Default expiration delay of DTLS cookies, in seconds if HAVE_TIME, or in number of cookies issued */ /** diff --git a/include/mbedtls/ssl.h b/include/mbedtls/ssl.h index 4b954bb458..0df142d685 100644 --- a/include/mbedtls/ssl.h +++ b/include/mbedtls/ssl.h @@ -599,8 +599,22 @@ * Size defines */ #if !defined(MBEDTLS_PSK_MAX_LEN) -#define MBEDTLS_PSK_MAX_LEN 32 /* 256 bits */ +/* + * If the library supports TLS 1.3 tickets and the cipher suite + * TLS1-3-AES-256-GCM-SHA384, set the PSK maximum length to 48 instead of 32. + * That way, the TLS 1.3 client and server are able to resume sessions where + * the cipher suite was TLS1-3-AES-256-GCM-SHA384 (pre-shared keys are 48 + * bytes long is that case). + */ +#if defined(MBEDTLS_SSL_PROTO_TLS1_3) && \ + defined(MBEDTLS_SSL_SESSION_TICKETS) && \ + defined(MBEDTLS_AES_C) && defined(MBEDTLS_GCM_C) && \ + defined(MBEDTLS_HAS_ALG_SHA_384_VIA_MD_OR_PSA_BASED_ON_USE_PSA) +#define MBEDTLS_PSK_MAX_LEN 48 /* 384 bits */ +#else +#define MBEDTLS_PSK_MAX_LEN 32 /* 256 bits */ #endif +#endif /* !MBEDTLS_PSK_MAX_LEN */ /* Dummy type used only for its size */ union mbedtls_ssl_premaster_secret { diff --git a/library/ssl_tls.c b/library/ssl_tls.c index 4312f154af..86f5c0b555 100644 --- a/library/ssl_tls.c +++ b/library/ssl_tls.c @@ -2145,12 +2145,9 @@ int mbedtls_ssl_set_hs_psk(mbedtls_ssl_context *ssl, return MBEDTLS_ERR_SSL_BAD_INPUT_DATA; } -#if defined(MBEDTLS_SSL_PROTO_TLS1_2) - if (ssl->tls_version == MBEDTLS_SSL_VERSION_TLS1_2 && - psk_len > MBEDTLS_PSK_MAX_LEN) { + if (psk_len > MBEDTLS_PSK_MAX_LEN) { return MBEDTLS_ERR_SSL_BAD_INPUT_DATA; } -#endif /* MBEDTLS_SSL_PROTO_TLS1_2 */ ssl_remove_psk(ssl); diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index b1ee654938..6794068731 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -13239,7 +13239,7 @@ requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED run_test "TLS 1.3: NewSessionTicket: Basic check, G->m" \ "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \ - "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \ + "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3:-AES-256-GCM -V -r" \ 0 \ -c "Connecting again- trying to resume previous session" \ -c "NEW SESSION TICKET (4) was received" \ From ee54de02b196145295037142dee589476d47f94c Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Fri, 24 Feb 2023 12:06:30 +0100 Subject: [PATCH 10/53] Fix comments Signed-off-by: Ronald Cron --- include/mbedtls/ssl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mbedtls/ssl.h b/include/mbedtls/ssl.h index 0df142d685..7d9643ee48 100644 --- a/include/mbedtls/ssl.h +++ b/include/mbedtls/ssl.h @@ -603,8 +603,8 @@ * If the library supports TLS 1.3 tickets and the cipher suite * TLS1-3-AES-256-GCM-SHA384, set the PSK maximum length to 48 instead of 32. * That way, the TLS 1.3 client and server are able to resume sessions where - * the cipher suite was TLS1-3-AES-256-GCM-SHA384 (pre-shared keys are 48 - * bytes long is that case). + * the cipher suite is TLS1-3-AES-256-GCM-SHA384 (pre-shared keys are 48 + * bytes long in that case). */ #if defined(MBEDTLS_SSL_PROTO_TLS1_3) && \ defined(MBEDTLS_SSL_SESSION_TICKETS) && \ From 7dc413021006df09e99f2cf6804ef46139cddd36 Mon Sep 17 00:00:00 2001 From: Ronald Cron Date: Fri, 24 Feb 2023 12:10:09 +0100 Subject: [PATCH 11/53] Improve GnuTLS client priority for resumption basic check Signed-off-by: Ronald Cron --- tests/ssl-opt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index 6794068731..b1ee654938 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -13239,7 +13239,7 @@ requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \ MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED run_test "TLS 1.3: NewSessionTicket: Basic check, G->m" \ "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \ - "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3:-AES-256-GCM -V -r" \ + "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \ 0 \ -c "Connecting again- trying to resume previous session" \ -c "NEW SESSION TICKET (4) was received" \ From 18d417340f2ed2818b4b673b1040933ca8d97737 Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Fri, 24 Feb 2023 16:00:21 +0000 Subject: [PATCH 12/53] Add Threat Model Summary Signed-off-by: Janos Follath --- SECURITY.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/SECURITY.md b/SECURITY.md index 33bbc2ff30..ae37dab778 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -18,3 +18,63 @@ goes public. Only the maintained branches, as listed in [`BRANCHES.md`](BRANCHES.md), get security fixes. Users are urged to always use the latest version of a maintained branch. + +## Threat model + +We use the following classification of attacks: + +- **Remote Attacks:** The attacker can observe and modify data sent over the + network. This includes observing timing of individual packets and potentially + delaying legitimate messages. +- **Timing Attacks:** The attacker can gain information about the time certain + sets of instructions in Mbed TLS operations take. +- **Physical Attacks:** The attacker has access to physical information about + the hardware Mbed TLS is running on and/or can alter the physical state of + the hardware. + +### Remote attacks + +Mbed TLS aims to fully protect against remote attacks. Mbed Crypto aims to +enable the user application in providing full protection against remote +attacks. Said protection is limited to providing security guarantees offered by +the protocol in question. (For example Mbed TLS alone won't guarantee that the +messages will arrive without delay, as the TLS protocol doesn't guarantee that +either.) + +### Timing attacks + +Mbed TLS and Mbed Crypto provide limited protection against timing attacks. The +cost of protecting against timing attacks widely varies depending on the +granularity of the measurements and the noise present. Therefore the protection +in Mbed TLS and Mbed Crypto is limited. We are only aiming to provide protection +against publicly documented attacks. + +**Warning!** Block ciphers constitute an exception from this protection. For +details and workarounds see the section below. + +#### Block Ciphers + +Currently there are 4 block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES. +The Mbed TLS implementation uses lookup tables, which are vulnerable to timing +attacks. + +**Workarounds:** + +- Turn on hardware acceleration for AES. This is supported only on selected + architectures and currently only available for AES. See configuration options + `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details. +- Add a secure alternative implementation (typically bitslice implementation or + hardware acceleration) for the vulnerable cipher. See the [Alternative +Implementations Guide](docs/architecture/alternative-implementations.md) for + more information. +- Instead of a block cipher, use ChaCha20/Poly1305 for encryption and data + origin authentication. + +### Physical attacks + +Physical attacks are out of scope. Any attack using information about or +influencing the physical state of the hardware is considered physical, +independently of the attack vector. (For example Row Hammer and Screaming +Channels are considered physical attacks.) If physical attacks are present in a +use case or a user application's threat model, it needs to be mitigated by +physical countermeasures. From 24792d0a33b6283b8c84043d029698a3acb7251e Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Fri, 3 Mar 2023 14:16:12 +0000 Subject: [PATCH 13/53] Threat Model: Improve wording Signed-off-by: Janos Follath Co-authored-by: Dave Rodgman --- SECURITY.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index ae37dab778..50c8ffd980 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -26,8 +26,8 @@ We use the following classification of attacks: - **Remote Attacks:** The attacker can observe and modify data sent over the network. This includes observing timing of individual packets and potentially delaying legitimate messages. -- **Timing Attacks:** The attacker can gain information about the time certain - sets of instructions in Mbed TLS operations take. +- **Timing Attacks:** The attacker can gain information about the time taken + by certain sets of instructions in Mbed TLS operations. - **Physical Attacks:** The attacker has access to physical information about the hardware Mbed TLS is running on and/or can alter the physical state of the hardware. @@ -47,14 +47,14 @@ Mbed TLS and Mbed Crypto provide limited protection against timing attacks. The cost of protecting against timing attacks widely varies depending on the granularity of the measurements and the noise present. Therefore the protection in Mbed TLS and Mbed Crypto is limited. We are only aiming to provide protection -against publicly documented attacks. +against publicly documented attacks, and this protection is not currently complete. -**Warning!** Block ciphers constitute an exception from this protection. For +**Warning!** Block ciphers do not yet achieve full protection. For details and workarounds see the section below. #### Block Ciphers -Currently there are 4 block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES. +Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES. The Mbed TLS implementation uses lookup tables, which are vulnerable to timing attacks. @@ -63,7 +63,7 @@ attacks. - Turn on hardware acceleration for AES. This is supported only on selected architectures and currently only available for AES. See configuration options `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details. -- Add a secure alternative implementation (typically bitslice implementation or +- Add a secure alternative implementation (typically a bitsliced implementation or hardware acceleration) for the vulnerable cipher. See the [Alternative Implementations Guide](docs/architecture/alternative-implementations.md) for more information. From 144dd7d2fa1c2dc655064ef28de91f7042a36881 Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Fri, 3 Mar 2023 14:56:38 +0000 Subject: [PATCH 14/53] Threat Model: Miscellaneous clarifications Signed-off-by: Janos Follath --- SECURITY.md | 53 ++++++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 50c8ffd980..4ed9d3807c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -24,8 +24,8 @@ Users are urged to always use the latest version of a maintained branch. We use the following classification of attacks: - **Remote Attacks:** The attacker can observe and modify data sent over the - network. This includes observing timing of individual packets and potentially - delaying legitimate messages. + network. This includes observing the content and timing of individual packets, + as well as suppressing or delaying legitimate messages, and injecting messages. - **Timing Attacks:** The attacker can gain information about the time taken by certain sets of instructions in Mbed TLS operations. - **Physical Attacks:** The attacker has access to physical information about @@ -34,20 +34,19 @@ We use the following classification of attacks: ### Remote attacks -Mbed TLS aims to fully protect against remote attacks. Mbed Crypto aims to -enable the user application in providing full protection against remote -attacks. Said protection is limited to providing security guarantees offered by -the protocol in question. (For example Mbed TLS alone won't guarantee that the -messages will arrive without delay, as the TLS protocol doesn't guarantee that -either.) +Mbed TLS aims to fully protect against remote attacks and to enable the user +application in providing full protection against remote attacks. Said +protection is limited to providing security guarantees offered by the protocol +in question. (For example Mbed TLS alone won't guarantee that the messages will +arrive without delay, as the TLS protocol doesn't guarantee that either.) ### Timing attacks -Mbed TLS and Mbed Crypto provide limited protection against timing attacks. The -cost of protecting against timing attacks widely varies depending on the -granularity of the measurements and the noise present. Therefore the protection -in Mbed TLS and Mbed Crypto is limited. We are only aiming to provide protection -against publicly documented attacks, and this protection is not currently complete. +Mbed TLS provides limited protection against timing attacks. The cost of +protecting against timing attacks widely varies depending on the granularity of +the measurements and the noise present. Therefore the protection in Mbed TLS is +limited. We are only aiming to provide protection against publicly documented +attacks, and this protection is not currently complete. **Warning!** Block ciphers do not yet achieve full protection. For details and workarounds see the section below. @@ -55,26 +54,26 @@ details and workarounds see the section below. #### Block Ciphers Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES. -The Mbed TLS implementation uses lookup tables, which are vulnerable to timing -attacks. +The pure software implementation in Mbed TLS implementation uses lookup tables, +which are vulnerable to timing attacks. **Workarounds:** - Turn on hardware acceleration for AES. This is supported only on selected architectures and currently only available for AES. See configuration options `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details. -- Add a secure alternative implementation (typically a bitsliced implementation or - hardware acceleration) for the vulnerable cipher. See the [Alternative -Implementations Guide](docs/architecture/alternative-implementations.md) for - more information. -- Instead of a block cipher, use ChaCha20/Poly1305 for encryption and data - origin authentication. +- Add a secure alternative implementation (typically hardware acceleration) for + the vulnerable cipher. See the [Alternative Implementations +Guide](docs/architecture/alternative-implementations.md) for more information. +- Use cryptographic mechanisms that are not based on block ciphers. In + particular, for authenticated encryption, use ChaCha20/Poly1305 instead of + block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG. ### Physical attacks -Physical attacks are out of scope. Any attack using information about or -influencing the physical state of the hardware is considered physical, -independently of the attack vector. (For example Row Hammer and Screaming -Channels are considered physical attacks.) If physical attacks are present in a -use case or a user application's threat model, it needs to be mitigated by -physical countermeasures. +Physical attacks are out of scope (eg. power analysis or radio emissions). Any +attack using information about or influencing the physical state of the +hardware is considered physical, independently of the attack vector. (For +example Row Hammer and Screaming Channels are considered physical attacks.) If +physical attacks are present in a use case or a user application's threat +model, it needs to be mitigated by physical countermeasures. From 9ec195c984ba6978443086a5a7e924068210ee4d Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Mon, 6 Mar 2023 14:54:59 +0000 Subject: [PATCH 15/53] Threat Model: reorganise threat definitions Simplify organisation by placing threat definitions in their respective sections. Signed-off-by: Janos Follath --- SECURITY.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 4ed9d3807c..7981a44b64 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -23,17 +23,12 @@ Users are urged to always use the latest version of a maintained branch. We use the following classification of attacks: -- **Remote Attacks:** The attacker can observe and modify data sent over the - network. This includes observing the content and timing of individual packets, - as well as suppressing or delaying legitimate messages, and injecting messages. -- **Timing Attacks:** The attacker can gain information about the time taken - by certain sets of instructions in Mbed TLS operations. -- **Physical Attacks:** The attacker has access to physical information about - the hardware Mbed TLS is running on and/or can alter the physical state of - the hardware. - ### Remote attacks +The attacker can observe and modify data sent over the network. This includes +observing the content and timing of individual packets, as well as suppressing +or delaying legitimate messages, and injecting messages. + Mbed TLS aims to fully protect against remote attacks and to enable the user application in providing full protection against remote attacks. Said protection is limited to providing security guarantees offered by the protocol @@ -42,6 +37,9 @@ arrive without delay, as the TLS protocol doesn't guarantee that either.) ### Timing attacks +The attacker can gain information about the time taken by certain sets of +instructions in Mbed TLS operations. + Mbed TLS provides limited protection against timing attacks. The cost of protecting against timing attacks widely varies depending on the granularity of the measurements and the noise present. Therefore the protection in Mbed TLS is @@ -71,6 +69,9 @@ Guide](docs/architecture/alternative-implementations.md) for more information. ### Physical attacks +The attacker has access to physical information about the hardware Mbed TLS is +running on and/or can alter the physical state of the hardware. + Physical attacks are out of scope (eg. power analysis or radio emissions). Any attack using information about or influencing the physical state of the hardware is considered physical, independently of the attack vector. (For From fef82fd39b7d3d6e6e34ad336a331ee6dbbfd8bb Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Wed, 8 Mar 2023 16:10:39 +0000 Subject: [PATCH 16/53] Threat Model: increase classification detail Originally for the sake of simplicity there was a single category for software based attacks, namely timing side channel attacks. Be more precise and categorise attacks as software based whether or not they rely on physical information. Signed-off-by: Janos Follath --- SECURITY.md | 54 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 7981a44b64..c6345d65c8 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -35,22 +35,33 @@ protection is limited to providing security guarantees offered by the protocol in question. (For example Mbed TLS alone won't guarantee that the messages will arrive without delay, as the TLS protocol doesn't guarantee that either.) -### Timing attacks +### Local attacks + +The attacker is capable of running code on the same hardware as Mbed TLS, but +there is still a security boundary between them (ie. the attacker can't for +example read secrets from Mbed TLS' memory directly). + +#### Timing attacks The attacker can gain information about the time taken by certain sets of -instructions in Mbed TLS operations. +instructions in Mbed TLS operations. (See for example the [Flush+Reload +paper](https://eprint.iacr.org/2013/448.pdf).) + +(Technically, timing information can be observed over the network or through +physical side channels as well. Network timing attacks are less powerful than +local and countermeasures protecting against local attacks prevent network +attacks as well. If the timing information is gained through physical side +channels, we consider them physical attacks and as such they are out of scope.) Mbed TLS provides limited protection against timing attacks. The cost of protecting against timing attacks widely varies depending on the granularity of the measurements and the noise present. Therefore the protection in Mbed TLS is -limited. We are only aiming to provide protection against publicly documented -attacks, and this protection is not currently complete. +limited. We are only aiming to provide protection against **publicly +documented** attacks, and this protection is not currently complete. **Warning!** Block ciphers do not yet achieve full protection. For details and workarounds see the section below. -#### Block Ciphers - Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES. The pure software implementation in Mbed TLS implementation uses lookup tables, which are vulnerable to timing attacks. @@ -67,14 +78,35 @@ Guide](docs/architecture/alternative-implementations.md) for more information. particular, for authenticated encryption, use ChaCha20/Poly1305 instead of block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG. +#### Local non-timing side channels + +The attacker code running on the platform has access to some sensor capable of +picking up information on the physical state of the hardware while Mbed TLS is +running. This can for example be any analogue to digital converter on the +platform that is located unfortunately enough to pick up the CPU noise. (See +for example the [Leaky Noise +paper](https://tches.iacr.org/index.php/TCHES/article/view/8297).) + +Mbed TLS doesn't offer any security guarantees against local non-timing based +side channel attacks. If local non-timing attacks are present in a use case or +a user application's threat model, it needs to be mitigated by the platform. + +#### Local fault injection attacks + +Software running on the same hardware can affect the physical state of the +device and introduce faults. (See for example the [Row Hammer +paper](https://users.ece.cmu.edu/~yoonguk/papers/kim-isca14.pdf).) + +Mbed TLS doesn't offer any security guarantees against local fault injection +attacks. If local fault injection attacks are present in a use case or a user +application's threat model, it needs to be mitigated by the platform. + ### Physical attacks The attacker has access to physical information about the hardware Mbed TLS is -running on and/or can alter the physical state of the hardware. +running on and/or can alter the physical state of the hardware (eg. power +analysis, radio emissions or fault injection). -Physical attacks are out of scope (eg. power analysis or radio emissions). Any -attack using information about or influencing the physical state of the -hardware is considered physical, independently of the attack vector. (For -example Row Hammer and Screaming Channels are considered physical attacks.) If +Mbed TLS doesn't offer any security guarantees against physical attacks. If physical attacks are present in a use case or a user application's threat model, it needs to be mitigated by physical countermeasures. From ecaa293d32008a57eaf0b4b8af5eac769e86768d Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Wed, 8 Mar 2023 16:38:07 +0000 Subject: [PATCH 17/53] Threat model: explain dangling countermeasures Signed-off-by: Janos Follath --- SECURITY.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/SECURITY.md b/SECURITY.md index c6345d65c8..95e549f44e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -110,3 +110,16 @@ analysis, radio emissions or fault injection). Mbed TLS doesn't offer any security guarantees against physical attacks. If physical attacks are present in a use case or a user application's threat model, it needs to be mitigated by physical countermeasures. + +### Caveats + +#### Out of scope countermeasures + +Mbed TLS has evolved organically and a well defined threat model hasn't always +been present. Therefore, Mbed TLS might have countermeasures against attacks +outside the above defined threat model. + +The presence of such countermeasures don't mean that Mbed TLS provides +protection against a class of attacks outside of the above described threat +model. Neither does it mean that the failure of such a countermeasure is +considered a vulnerability. From 3d377605f3afa0499af1720a39c951362f1b573d Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Wed, 8 Mar 2023 16:53:50 +0000 Subject: [PATCH 18/53] Threat Model: move the block cipher section The block cipher exception affects both remote and local timing attacks. Move them to the Caveats section and reference it from both the local and the remote attack section. Signed-off-by: Janos Follath --- SECURITY.md | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 95e549f44e..677e68555d 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -35,6 +35,11 @@ protection is limited to providing security guarantees offered by the protocol in question. (For example Mbed TLS alone won't guarantee that the messages will arrive without delay, as the TLS protocol doesn't guarantee that either.) +**Warning!** Depending on network latency, the timing of messages might be +enough to launch some timing attacks. Block ciphers do not yet achieve full +protection against these. For details and workarounds see the [Block +Ciphers](#block-ciphers) section. + ### Local attacks The attacker is capable of running code on the same hardware as Mbed TLS, but @@ -60,23 +65,7 @@ limited. We are only aiming to provide protection against **publicly documented** attacks, and this protection is not currently complete. **Warning!** Block ciphers do not yet achieve full protection. For -details and workarounds see the section below. - -Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES. -The pure software implementation in Mbed TLS implementation uses lookup tables, -which are vulnerable to timing attacks. - -**Workarounds:** - -- Turn on hardware acceleration for AES. This is supported only on selected - architectures and currently only available for AES. See configuration options - `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details. -- Add a secure alternative implementation (typically hardware acceleration) for - the vulnerable cipher. See the [Alternative Implementations -Guide](docs/architecture/alternative-implementations.md) for more information. -- Use cryptographic mechanisms that are not based on block ciphers. In - particular, for authenticated encryption, use ChaCha20/Poly1305 instead of - block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG. +details and workarounds see the [Block Ciphers](#block-ciphers) section. #### Local non-timing side channels @@ -123,3 +112,24 @@ The presence of such countermeasures don't mean that Mbed TLS provides protection against a class of attacks outside of the above described threat model. Neither does it mean that the failure of such a countermeasure is considered a vulnerability. + +#### Block ciphers + +Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and +DES. The pure software implementation in Mbed TLS implementation uses lookup +tables, which are vulnerable to timing attacks. + +These timing attacks can be physical, local or depending on network latency +even a remote. The attacks can result in key recovery. + +**Workarounds:** + +- Turn on hardware acceleration for AES. This is supported only on selected + architectures and currently only available for AES. See configuration options + `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details. +- Add a secure alternative implementation (typically hardware acceleration) for + the vulnerable cipher. See the [Alternative Implementations +Guide](docs/architecture/alternative-implementations.md) for more information. +- Use cryptographic mechanisms that are not based on block ciphers. In + particular, for authenticated encryption, use ChaCha20/Poly1305 instead of + block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG. From d5a09400ae23c949bfcc935dcd317eefe134d163 Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Wed, 8 Mar 2023 19:58:29 +0000 Subject: [PATCH 19/53] Threat Model: improve wording Signed-off-by: Janos Follath --- SECURITY.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 677e68555d..d0281ace93 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -42,14 +42,14 @@ Ciphers](#block-ciphers) section. ### Local attacks -The attacker is capable of running code on the same hardware as Mbed TLS, but -there is still a security boundary between them (ie. the attacker can't for -example read secrets from Mbed TLS' memory directly). +The attacker can run software on the same machine. The attacker has +insufficient privileges to directly access Mbed TLS assets such as memory and +files. #### Timing attacks -The attacker can gain information about the time taken by certain sets of -instructions in Mbed TLS operations. (See for example the [Flush+Reload +The attacker is able to observe the timing of instructions executed by Mbed +TLS.(See for example the [Flush+Reload paper](https://eprint.iacr.org/2013/448.pdf).) (Technically, timing information can be observed over the network or through From 042e433edad41e8dbba3d4833bfe2e9b05ef828d Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Wed, 8 Mar 2023 20:07:59 +0000 Subject: [PATCH 20/53] Threat Model: clarify attack vectors Timing attacks can be launched by any of the main 3 attackers. Clarify exactly how these are covered. Signed-off-by: Janos Follath --- SECURITY.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index d0281ace93..387221e61f 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -52,17 +52,16 @@ The attacker is able to observe the timing of instructions executed by Mbed TLS.(See for example the [Flush+Reload paper](https://eprint.iacr.org/2013/448.pdf).) -(Technically, timing information can be observed over the network or through -physical side channels as well. Network timing attacks are less powerful than -local and countermeasures protecting against local attacks prevent network -attacks as well. If the timing information is gained through physical side -channels, we consider them physical attacks and as such they are out of scope.) - Mbed TLS provides limited protection against timing attacks. The cost of protecting against timing attacks widely varies depending on the granularity of the measurements and the noise present. Therefore the protection in Mbed TLS is limited. We are only aiming to provide protection against **publicly -documented** attacks, and this protection is not currently complete. +documented** attacks. + +**Remark:** Timing information can be observed over the network or through +physical side channels as well. Remote and physical timing attacks are covered +in the [Remote attacks](remote-attacks) and [Physical +attacks](physical-attacks) sections respectively. **Warning!** Block ciphers do not yet achieve full protection. For details and workarounds see the [Block Ciphers](#block-ciphers) section. From d4f31c87d1cc93d8c4b830e413524cb458b89d9d Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 10 Mar 2023 22:21:47 +0100 Subject: [PATCH 21/53] Update bibliographic references There are new versions of the Intel whitepapers and they've moved. Signed-off-by: Gilles Peskine --- library/aesni.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/library/aesni.c b/library/aesni.c index f6b304d157..2d5ed9e0ca 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -18,8 +18,8 @@ */ /* - * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set - * [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/ + * [AES-WP] https://www.intel.com/content/www/us/en/developer/articles/tool/intel-advanced-encryption-standard-aes-instructions-set.html + * [CLMUL-WP] https://www.intel.com/content/www/us/en/develop/download/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode.html */ #include "common.h" @@ -152,7 +152,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16], /* * Caryless multiplication xmm2:xmm1 = xmm0 * xmm1 - * using [CLMUL-WP] algorithm 1 (p. 13). + * using [CLMUL-WP] algorithm 1 (p. 12). */ "movdqa %%xmm1, %%xmm2 \n\t" // copy of b1:b0 "movdqa %%xmm1, %%xmm3 \n\t" // same @@ -170,7 +170,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16], /* * Now shift the result one bit to the left, - * taking advantage of [CLMUL-WP] eq 27 (p. 20) + * taking advantage of [CLMUL-WP] eq 27 (p. 18) */ "movdqa %%xmm1, %%xmm3 \n\t" // r1:r0 "movdqa %%xmm2, %%xmm4 \n\t" // r3:r2 @@ -188,7 +188,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16], /* * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1 - * using [CLMUL-WP] algorithm 5 (p. 20). + * using [CLMUL-WP] algorithm 5 (p. 18). * Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted). */ /* Step 2 (1) */ From d0f9b0bacc182de20dfcd86983ce7747a4215bb5 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 10 Mar 2023 22:25:13 +0100 Subject: [PATCH 22/53] Don't warn about Msan/Valgrind if AESNI isn't actually built The warning is only correct if the assembly code for AESNI is built, not if MBEDTLS_AESNI_C is activated but MBEDTLS_HAVE_ASM is disabled or the target architecture isn't x86_64. This is a partial fix for #7236. Signed-off-by: Gilles Peskine --- library/aesni.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/library/aesni.c b/library/aesni.c index 2d5ed9e0ca..786dbd92fc 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -26,13 +26,6 @@ #if defined(MBEDTLS_AESNI_C) -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#warning \ - "MBEDTLS_AESNI_C is known to cause spurious error reports with some memory sanitizers as they do not understand the assembly code." -#endif -#endif - #include "aesni.h" #include @@ -59,6 +52,13 @@ int mbedtls_aesni_has_support(unsigned int what) return (c & what) != 0; } +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#warning \ + "MBEDTLS_AESNI_C is known to cause spurious error reports with some memory sanitizers as they do not understand the assembly code." +#endif +#endif + /* * Binutils needs to be at least 2.19 to support AES-NI instructions. * Unfortunately, a lot of users have a lower version now (2014-04). From c51a413c473819cc83e73aeddecbafecddf528ca Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Tue, 14 Mar 2023 12:47:27 +0000 Subject: [PATCH 23/53] Threat Model: improve wording and grammar Signed-off-by: Janos Follath --- SECURITY.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 387221e61f..dcffa1d9be 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -21,7 +21,7 @@ Users are urged to always use the latest version of a maintained branch. ## Threat model -We use the following classification of attacks: +We classify attacks based on the capabilities of the attacker. ### Remote attacks @@ -32,13 +32,13 @@ or delaying legitimate messages, and injecting messages. Mbed TLS aims to fully protect against remote attacks and to enable the user application in providing full protection against remote attacks. Said protection is limited to providing security guarantees offered by the protocol -in question. (For example Mbed TLS alone won't guarantee that the messages will -arrive without delay, as the TLS protocol doesn't guarantee that either.) +being implemented. (For example Mbed TLS alone won't guarantee that the +messages will arrive without delay, as the TLS protocol doesn't guarantee that +either.) -**Warning!** Depending on network latency, the timing of messages might be -enough to launch some timing attacks. Block ciphers do not yet achieve full -protection against these. For details and workarounds see the [Block -Ciphers](#block-ciphers) section. +**Warning!** Block ciphers do not yet achieve full protection against attackers +who can measure the timing of packets with sufficient precision. For details +and workarounds see the [Block Ciphers](#block-ciphers) section. ### Local attacks @@ -70,14 +70,14 @@ details and workarounds see the [Block Ciphers](#block-ciphers) section. The attacker code running on the platform has access to some sensor capable of picking up information on the physical state of the hardware while Mbed TLS is -running. This can for example be any analogue to digital converter on the +running. This could for example be an analogue-to-digital converter on the platform that is located unfortunately enough to pick up the CPU noise. (See for example the [Leaky Noise paper](https://tches.iacr.org/index.php/TCHES/article/view/8297).) -Mbed TLS doesn't offer any security guarantees against local non-timing based +Mbed TLS doesn't make any security guarantees against local non-timing-based side channel attacks. If local non-timing attacks are present in a use case or -a user application's threat model, it needs to be mitigated by the platform. +a user application's threat model, they need to be mitigated by the platform. #### Local fault injection attacks @@ -85,23 +85,23 @@ Software running on the same hardware can affect the physical state of the device and introduce faults. (See for example the [Row Hammer paper](https://users.ece.cmu.edu/~yoonguk/papers/kim-isca14.pdf).) -Mbed TLS doesn't offer any security guarantees against local fault injection +Mbed TLS doesn't make any security guarantees against local fault injection attacks. If local fault injection attacks are present in a use case or a user -application's threat model, it needs to be mitigated by the platform. +application's threat model, they need to be mitigated by the platform. ### Physical attacks The attacker has access to physical information about the hardware Mbed TLS is -running on and/or can alter the physical state of the hardware (eg. power +running on and/or can alter the physical state of the hardware (e.g. power analysis, radio emissions or fault injection). -Mbed TLS doesn't offer any security guarantees against physical attacks. If +Mbed TLS doesn't make any security guarantees against physical attacks. If physical attacks are present in a use case or a user application's threat -model, it needs to be mitigated by physical countermeasures. +model, they need to be mitigated by physical countermeasures. ### Caveats -#### Out of scope countermeasures +#### Out-of-scope countermeasures Mbed TLS has evolved organically and a well defined threat model hasn't always been present. Therefore, Mbed TLS might have countermeasures against attacks From 4317a9ef1f2e4f5d5c52c2952df1d2c0423b9c6b Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Tue, 14 Mar 2023 14:49:34 +0000 Subject: [PATCH 24/53] Threat Model: clarify stance on timing attacks Signed-off-by: Janos Follath --- SECURITY.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index dcffa1d9be..97fe0e7475 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -48,15 +48,20 @@ files. #### Timing attacks -The attacker is able to observe the timing of instructions executed by Mbed -TLS.(See for example the [Flush+Reload -paper](https://eprint.iacr.org/2013/448.pdf).) +The attacker is able to observe the timing of instructions executed by Mbed TLS +by leveraging shared hardware that both Mbed TLS and the attacker have access +to. Typical attack vectors include cache timings, memory bus contention and +branch prediction. Mbed TLS provides limited protection against timing attacks. The cost of protecting against timing attacks widely varies depending on the granularity of the measurements and the noise present. Therefore the protection in Mbed TLS is limited. We are only aiming to provide protection against **publicly -documented** attacks. +documented attack techniques**. + +As attacks keep improving, so does Mbed TLS's protection. Mbed TLS is moving +towards a model of fully timing-invariant code, but has not reached this point +yet. **Remark:** Timing information can be observed over the network or through physical side channels as well. Remote and physical timing attacks are covered From ba75955cd80d57314ad6ad16922dec35515cab74 Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Tue, 14 Mar 2023 14:54:44 +0000 Subject: [PATCH 25/53] Threat Model: remove references Remove references to scientific papers as they are too specific and might be misleading. Signed-off-by: Janos Follath --- SECURITY.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 97fe0e7475..8d2337111c 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -76,9 +76,7 @@ details and workarounds see the [Block Ciphers](#block-ciphers) section. The attacker code running on the platform has access to some sensor capable of picking up information on the physical state of the hardware while Mbed TLS is running. This could for example be an analogue-to-digital converter on the -platform that is located unfortunately enough to pick up the CPU noise. (See -for example the [Leaky Noise -paper](https://tches.iacr.org/index.php/TCHES/article/view/8297).) +platform that is located unfortunately enough to pick up the CPU noise. Mbed TLS doesn't make any security guarantees against local non-timing-based side channel attacks. If local non-timing attacks are present in a use case or @@ -87,8 +85,7 @@ a user application's threat model, they need to be mitigated by the platform. #### Local fault injection attacks Software running on the same hardware can affect the physical state of the -device and introduce faults. (See for example the [Row Hammer -paper](https://users.ece.cmu.edu/~yoonguk/papers/kim-isca14.pdf).) +device and introduce faults. Mbed TLS doesn't make any security guarantees against local fault injection attacks. If local fault injection attacks are present in a use case or a user From 9118bf5791306398f4a8d5be79b303de62b3d0f6 Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Tue, 14 Mar 2023 15:43:24 +0000 Subject: [PATCH 26/53] Threat Model: adjust modality Signed-off-by: Janos Follath --- SECURITY.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 8d2337111c..8d3678a5ee 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -25,9 +25,10 @@ We classify attacks based on the capabilities of the attacker. ### Remote attacks -The attacker can observe and modify data sent over the network. This includes -observing the content and timing of individual packets, as well as suppressing -or delaying legitimate messages, and injecting messages. +In this section, we consider an attacker who can observe and modify data sent +over the network. This includes observing the content and timing of individual +packets, as well as suppressing or delaying legitimate messages, and injecting +messages. Mbed TLS aims to fully protect against remote attacks and to enable the user application in providing full protection against remote attacks. Said @@ -42,9 +43,9 @@ and workarounds see the [Block Ciphers](#block-ciphers) section. ### Local attacks -The attacker can run software on the same machine. The attacker has -insufficient privileges to directly access Mbed TLS assets such as memory and -files. +In this section, we consider an attacker who can run software on the same +machine. The attacker has insufficient privileges to directly access Mbed TLS +assets such as memory and files. #### Timing attacks @@ -93,9 +94,10 @@ application's threat model, they need to be mitigated by the platform. ### Physical attacks -The attacker has access to physical information about the hardware Mbed TLS is -running on and/or can alter the physical state of the hardware (e.g. power -analysis, radio emissions or fault injection). +In this section, we consider an attacker who can attacker has access to +physical information about the hardware Mbed TLS is running on and/or can alter +the physical state of the hardware (e.g. power analysis, radio emissions or +fault injection). Mbed TLS doesn't make any security guarantees against physical attacks. If physical attacks are present in a use case or a user application's threat From 0086f8626a7fbd82625d6696b621d1e2ad56edad Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Wed, 15 Mar 2023 13:31:48 +0000 Subject: [PATCH 27/53] Add changelog entry PR7083 silently fixed a security vulnerability in public, this commit adds a changelog entry for it. Signed-off-by: Janos Follath --- ChangeLog.d/fix-overread-in-tls13-debug.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 ChangeLog.d/fix-overread-in-tls13-debug.txt diff --git a/ChangeLog.d/fix-overread-in-tls13-debug.txt b/ChangeLog.d/fix-overread-in-tls13-debug.txt new file mode 100644 index 0000000000..e089ce161e --- /dev/null +++ b/ChangeLog.d/fix-overread-in-tls13-debug.txt @@ -0,0 +1,3 @@ +Security + * Fix a potential heap buffer overread in TLS 1.3 client-side when + MBEDTLS_DEBUG_C is enabled. This may result in an application crash. From 4e201448824a2cd951598733513cdb5ce285c1fe Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Wed, 15 Mar 2023 19:36:03 +0100 Subject: [PATCH 28/53] Improve the presentation of assembly blocks Uncrustify indents ``` asm("foo" HELLO "bar" "wibble"); ``` but we would like ``` asm("foo" HELLO "bar" "wibble"); ``` Make "bar" an argument of the macro HELLO, which makes the indentation from uncrustify match the semantics (everything should be aligned to the same column). Signed-off-by: Gilles Peskine --- library/aesni.c | 236 ++++++++++++++++++++++++------------------------ 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/library/aesni.c b/library/aesni.c index 786dbd92fc..77243ea387 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -69,13 +69,13 @@ int mbedtls_aesni_has_support(unsigned int what) * Operand macros are in gas order (src, dst) as opposed to Intel order * (dst, src) in order to blend better into the surrounding assembly code. */ -#define AESDEC ".byte 0x66,0x0F,0x38,0xDE," -#define AESDECLAST ".byte 0x66,0x0F,0x38,0xDF," -#define AESENC ".byte 0x66,0x0F,0x38,0xDC," -#define AESENCLAST ".byte 0x66,0x0F,0x38,0xDD," -#define AESIMC ".byte 0x66,0x0F,0x38,0xDB," -#define AESKEYGENA ".byte 0x66,0x0F,0x3A,0xDF," -#define PCLMULQDQ ".byte 0x66,0x0F,0x3A,0x44," +#define AESDEC(regs) ".byte 0x66,0x0F,0x38,0xDE," regs "\n\t" +#define AESDECLAST(regs) ".byte 0x66,0x0F,0x38,0xDF," regs "\n\t" +#define AESENC(regs) ".byte 0x66,0x0F,0x38,0xDC," regs "\n\t" +#define AESENCLAST(regs) ".byte 0x66,0x0F,0x38,0xDD," regs "\n\t" +#define AESIMC(regs) ".byte 0x66,0x0F,0x38,0xDB," regs "\n\t" +#define AESKEYGENA(regs, imm) ".byte 0x66,0x0F,0x3A,0xDF," regs "," imm "\n\t" +#define PCLMULQDQ(regs, imm) ".byte 0x66,0x0F,0x3A,0x44," regs "," imm "\n\t" #define xmm0_xmm0 "0xC0" #define xmm0_xmm1 "0xC8" @@ -103,25 +103,25 @@ int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx, "1: \n\t" // encryption loop "movdqu (%1), %%xmm1 \n\t" // load round key - AESENC xmm1_xmm0 "\n\t" // do round - "add $16, %1 \n\t" // point to next round key - "subl $1, %0 \n\t" // loop - "jnz 1b \n\t" - "movdqu (%1), %%xmm1 \n\t" // load round key - AESENCLAST xmm1_xmm0 "\n\t" // last round - "jmp 3f \n\t" + AESENC(xmm1_xmm0) // do round + "add $16, %1 \n\t" // point to next round key + "subl $1, %0 \n\t" // loop + "jnz 1b \n\t" + "movdqu (%1), %%xmm1 \n\t" // load round key + AESENCLAST(xmm1_xmm0) // last round + "jmp 3f \n\t" - "2: \n\t" // decryption loop - "movdqu (%1), %%xmm1 \n\t" - AESDEC xmm1_xmm0 "\n\t" // do round - "add $16, %1 \n\t" - "subl $1, %0 \n\t" - "jnz 2b \n\t" - "movdqu (%1), %%xmm1 \n\t" // load round key - AESDECLAST xmm1_xmm0 "\n\t" // last round + "2: \n\t" // decryption loop + "movdqu (%1), %%xmm1 \n\t" + AESDEC(xmm1_xmm0) // do round + "add $16, %1 \n\t" + "subl $1, %0 \n\t" + "jnz 2b \n\t" + "movdqu (%1), %%xmm1 \n\t" // load round key + AESDECLAST(xmm1_xmm0) // last round - "3: \n\t" - "movdqu %%xmm0, (%4) \n\t" // export output + "3: \n\t" + "movdqu %%xmm0, (%4) \n\t" // export output : : "r" (ctx->nr), "r" (ctx->buf + ctx->rk_offset), "r" (mode), "r" (input), "r" (output) : "memory", "cc", "xmm0", "xmm1"); @@ -157,34 +157,34 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16], "movdqa %%xmm1, %%xmm2 \n\t" // copy of b1:b0 "movdqa %%xmm1, %%xmm3 \n\t" // same "movdqa %%xmm1, %%xmm4 \n\t" // same - PCLMULQDQ xmm0_xmm1 ",0x00 \n\t" // a0*b0 = c1:c0 - PCLMULQDQ xmm0_xmm2 ",0x11 \n\t" // a1*b1 = d1:d0 - PCLMULQDQ xmm0_xmm3 ",0x10 \n\t" // a0*b1 = e1:e0 - PCLMULQDQ xmm0_xmm4 ",0x01 \n\t" // a1*b0 = f1:f0 - "pxor %%xmm3, %%xmm4 \n\t" // e1+f1:e0+f0 - "movdqa %%xmm4, %%xmm3 \n\t" // same - "psrldq $8, %%xmm4 \n\t" // 0:e1+f1 - "pslldq $8, %%xmm3 \n\t" // e0+f0:0 - "pxor %%xmm4, %%xmm2 \n\t" // d1:d0+e1+f1 - "pxor %%xmm3, %%xmm1 \n\t" // c1+e0+f1:c0 + PCLMULQDQ(xmm0_xmm1, "0x00") // a0*b0 = c1:c0 + PCLMULQDQ(xmm0_xmm2, "0x11") // a1*b1 = d1:d0 + PCLMULQDQ(xmm0_xmm3, "0x10") // a0*b1 = e1:e0 + PCLMULQDQ(xmm0_xmm4, "0x01") // a1*b0 = f1:f0 + "pxor %%xmm3, %%xmm4 \n\t" // e1+f1:e0+f0 + "movdqa %%xmm4, %%xmm3 \n\t" // same + "psrldq $8, %%xmm4 \n\t" // 0:e1+f1 + "pslldq $8, %%xmm3 \n\t" // e0+f0:0 + "pxor %%xmm4, %%xmm2 \n\t" // d1:d0+e1+f1 + "pxor %%xmm3, %%xmm1 \n\t" // c1+e0+f1:c0 /* * Now shift the result one bit to the left, * taking advantage of [CLMUL-WP] eq 27 (p. 18) */ - "movdqa %%xmm1, %%xmm3 \n\t" // r1:r0 - "movdqa %%xmm2, %%xmm4 \n\t" // r3:r2 - "psllq $1, %%xmm1 \n\t" // r1<<1:r0<<1 - "psllq $1, %%xmm2 \n\t" // r3<<1:r2<<1 - "psrlq $63, %%xmm3 \n\t" // r1>>63:r0>>63 - "psrlq $63, %%xmm4 \n\t" // r3>>63:r2>>63 - "movdqa %%xmm3, %%xmm5 \n\t" // r1>>63:r0>>63 - "pslldq $8, %%xmm3 \n\t" // r0>>63:0 - "pslldq $8, %%xmm4 \n\t" // r2>>63:0 - "psrldq $8, %%xmm5 \n\t" // 0:r1>>63 - "por %%xmm3, %%xmm1 \n\t" // r1<<1|r0>>63:r0<<1 - "por %%xmm4, %%xmm2 \n\t" // r3<<1|r2>>62:r2<<1 - "por %%xmm5, %%xmm2 \n\t" // r3<<1|r2>>62:r2<<1|r1>>63 + "movdqa %%xmm1, %%xmm3 \n\t" // r1:r0 + "movdqa %%xmm2, %%xmm4 \n\t" // r3:r2 + "psllq $1, %%xmm1 \n\t" // r1<<1:r0<<1 + "psllq $1, %%xmm2 \n\t" // r3<<1:r2<<1 + "psrlq $63, %%xmm3 \n\t" // r1>>63:r0>>63 + "psrlq $63, %%xmm4 \n\t" // r3>>63:r2>>63 + "movdqa %%xmm3, %%xmm5 \n\t" // r1>>63:r0>>63 + "pslldq $8, %%xmm3 \n\t" // r0>>63:0 + "pslldq $8, %%xmm4 \n\t" // r2>>63:0 + "psrldq $8, %%xmm5 \n\t" // 0:r1>>63 + "por %%xmm3, %%xmm1 \n\t" // r1<<1|r0>>63:r0<<1 + "por %%xmm4, %%xmm2 \n\t" // r3<<1|r2>>62:r2<<1 + "por %%xmm5, %%xmm2 \n\t" // r3<<1|r2>>62:r2<<1|r1>>63 /* * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1 @@ -192,44 +192,44 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16], * Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted). */ /* Step 2 (1) */ - "movdqa %%xmm1, %%xmm3 \n\t" // x1:x0 - "movdqa %%xmm1, %%xmm4 \n\t" // same - "movdqa %%xmm1, %%xmm5 \n\t" // same - "psllq $63, %%xmm3 \n\t" // x1<<63:x0<<63 = stuff:a - "psllq $62, %%xmm4 \n\t" // x1<<62:x0<<62 = stuff:b - "psllq $57, %%xmm5 \n\t" // x1<<57:x0<<57 = stuff:c + "movdqa %%xmm1, %%xmm3 \n\t" // x1:x0 + "movdqa %%xmm1, %%xmm4 \n\t" // same + "movdqa %%xmm1, %%xmm5 \n\t" // same + "psllq $63, %%xmm3 \n\t" // x1<<63:x0<<63 = stuff:a + "psllq $62, %%xmm4 \n\t" // x1<<62:x0<<62 = stuff:b + "psllq $57, %%xmm5 \n\t" // x1<<57:x0<<57 = stuff:c /* Step 2 (2) */ - "pxor %%xmm4, %%xmm3 \n\t" // stuff:a+b - "pxor %%xmm5, %%xmm3 \n\t" // stuff:a+b+c - "pslldq $8, %%xmm3 \n\t" // a+b+c:0 - "pxor %%xmm3, %%xmm1 \n\t" // x1+a+b+c:x0 = d:x0 + "pxor %%xmm4, %%xmm3 \n\t" // stuff:a+b + "pxor %%xmm5, %%xmm3 \n\t" // stuff:a+b+c + "pslldq $8, %%xmm3 \n\t" // a+b+c:0 + "pxor %%xmm3, %%xmm1 \n\t" // x1+a+b+c:x0 = d:x0 /* Steps 3 and 4 */ - "movdqa %%xmm1,%%xmm0 \n\t" // d:x0 - "movdqa %%xmm1,%%xmm4 \n\t" // same - "movdqa %%xmm1,%%xmm5 \n\t" // same - "psrlq $1, %%xmm0 \n\t" // e1:x0>>1 = e1:e0' - "psrlq $2, %%xmm4 \n\t" // f1:x0>>2 = f1:f0' - "psrlq $7, %%xmm5 \n\t" // g1:x0>>7 = g1:g0' - "pxor %%xmm4, %%xmm0 \n\t" // e1+f1:e0'+f0' - "pxor %%xmm5, %%xmm0 \n\t" // e1+f1+g1:e0'+f0'+g0' + "movdqa %%xmm1,%%xmm0 \n\t" // d:x0 + "movdqa %%xmm1,%%xmm4 \n\t" // same + "movdqa %%xmm1,%%xmm5 \n\t" // same + "psrlq $1, %%xmm0 \n\t" // e1:x0>>1 = e1:e0' + "psrlq $2, %%xmm4 \n\t" // f1:x0>>2 = f1:f0' + "psrlq $7, %%xmm5 \n\t" // g1:x0>>7 = g1:g0' + "pxor %%xmm4, %%xmm0 \n\t" // e1+f1:e0'+f0' + "pxor %%xmm5, %%xmm0 \n\t" // e1+f1+g1:e0'+f0'+g0' // e0'+f0'+g0' is almost e0+f0+g0, ex\tcept for some missing // bits carried from d. Now get those\t bits back in. - "movdqa %%xmm1,%%xmm3 \n\t" // d:x0 - "movdqa %%xmm1,%%xmm4 \n\t" // same - "movdqa %%xmm1,%%xmm5 \n\t" // same - "psllq $63, %%xmm3 \n\t" // d<<63:stuff - "psllq $62, %%xmm4 \n\t" // d<<62:stuff - "psllq $57, %%xmm5 \n\t" // d<<57:stuff - "pxor %%xmm4, %%xmm3 \n\t" // d<<63+d<<62:stuff - "pxor %%xmm5, %%xmm3 \n\t" // missing bits of d:stuff - "psrldq $8, %%xmm3 \n\t" // 0:missing bits of d - "pxor %%xmm3, %%xmm0 \n\t" // e1+f1+g1:e0+f0+g0 - "pxor %%xmm1, %%xmm0 \n\t" // h1:h0 - "pxor %%xmm2, %%xmm0 \n\t" // x3+h1:x2+h0 + "movdqa %%xmm1,%%xmm3 \n\t" // d:x0 + "movdqa %%xmm1,%%xmm4 \n\t" // same + "movdqa %%xmm1,%%xmm5 \n\t" // same + "psllq $63, %%xmm3 \n\t" // d<<63:stuff + "psllq $62, %%xmm4 \n\t" // d<<62:stuff + "psllq $57, %%xmm5 \n\t" // d<<57:stuff + "pxor %%xmm4, %%xmm3 \n\t" // d<<63+d<<62:stuff + "pxor %%xmm5, %%xmm3 \n\t" // missing bits of d:stuff + "psrldq $8, %%xmm3 \n\t" // 0:missing bits of d + "pxor %%xmm3, %%xmm0 \n\t" // e1+f1+g1:e0+f0+g0 + "pxor %%xmm1, %%xmm0 \n\t" // h1:h0 + "pxor %%xmm2, %%xmm0 \n\t" // x3+h1:x2+h0 - "movdqu %%xmm0, (%2) \n\t" // done + "movdqu %%xmm0, (%2) \n\t" // done : : "r" (aa), "r" (bb), "r" (cc) : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); @@ -255,8 +255,8 @@ void mbedtls_aesni_inverse_key(unsigned char *invkey, for (fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16) { asm ("movdqu (%0), %%xmm0 \n\t" - AESIMC xmm0_xmm0 "\n\t" - "movdqu %%xmm0, (%1) \n\t" + AESIMC(xmm0_xmm0) + "movdqu %%xmm0, (%1) \n\t" : : "r" (fk), "r" (ik) : "memory", "xmm0"); @@ -300,16 +300,16 @@ static void aesni_setkey_enc_128(unsigned char *rk, /* Main "loop" */ "2: \n\t" - AESKEYGENA xmm0_xmm1 ",0x01 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x02 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x04 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x08 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x10 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x20 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x40 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x80 \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x1B \n\tcall 1b \n\t" - AESKEYGENA xmm0_xmm1 ",0x36 \n\tcall 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x01") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x02") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x04") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x08") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x10") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x20") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x40") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x80") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x1B") "call 1b \n\t" + AESKEYGENA(xmm0_xmm1, "0x36") "call 1b \n\t" : : "r" (rk), "r" (key) : "memory", "cc", "0"); @@ -358,14 +358,14 @@ static void aesni_setkey_enc_192(unsigned char *rk, "ret \n\t" "2: \n\t" - AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x80 \n\tcall 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x01") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x02") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x04") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x08") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x10") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x20") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x40") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x80") "call 1b \n\t" : : "r" (rk), "r" (key) @@ -408,31 +408,31 @@ static void aesni_setkey_enc_256(unsigned char *rk, /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 ) * and proceed to generate next round key from there */ - AESKEYGENA xmm0_xmm2 ",0x00 \n\t" - "pshufd $0xaa, %%xmm2, %%xmm2 \n\t" - "pxor %%xmm1, %%xmm2 \n\t" - "pslldq $4, %%xmm1 \n\t" - "pxor %%xmm1, %%xmm2 \n\t" - "pslldq $4, %%xmm1 \n\t" - "pxor %%xmm1, %%xmm2 \n\t" - "pslldq $4, %%xmm1 \n\t" - "pxor %%xmm2, %%xmm1 \n\t" - "add $16, %0 \n\t" - "movdqu %%xmm1, (%0) \n\t" - "ret \n\t" + AESKEYGENA(xmm0_xmm2, "0x00") + "pshufd $0xaa, %%xmm2, %%xmm2 \n\t" + "pxor %%xmm1, %%xmm2 \n\t" + "pslldq $4, %%xmm1 \n\t" + "pxor %%xmm1, %%xmm2 \n\t" + "pslldq $4, %%xmm1 \n\t" + "pxor %%xmm1, %%xmm2 \n\t" + "pslldq $4, %%xmm1 \n\t" + "pxor %%xmm2, %%xmm1 \n\t" + "add $16, %0 \n\t" + "movdqu %%xmm1, (%0) \n\t" + "ret \n\t" /* * Main "loop" - Generating one more key than necessary, * see definition of mbedtls_aes_context.buf */ - "2: \n\t" - AESKEYGENA xmm1_xmm2 ",0x01 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x02 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x04 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x08 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x10 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x20 \n\tcall 1b \n\t" - AESKEYGENA xmm1_xmm2 ",0x40 \n\tcall 1b \n\t" + "2: \n\t" + AESKEYGENA(xmm1_xmm2, "0x01") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x02") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x04") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x08") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x10") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x20") "call 1b \n\t" + AESKEYGENA(xmm1_xmm2, "0x40") "call 1b \n\t" : : "r" (rk), "r" (key) : "memory", "cc", "0"); From 9af58cd7f8e08a66b3e9b11106c31ab56be7e67f Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 10 Mar 2023 22:29:32 +0100 Subject: [PATCH 29/53] New preprocessor symbol indicating that AESNI support is present The configuration symbol MBEDTLS_AESNI_C requests AESNI support, but it is ignored if the platform doesn't have AESNI. This allows keeping MBEDTLS_AESNI_C enabled (as it is in the default build) when building for platforms other than x86_64, or when MBEDTLS_HAVE_ASM is disabled. To facilitate maintenance, always use the symbol MBEDTLS_AESNI_HAVE_CODE to answer the question "can I call mbedtls_aesni_xxx functions?", rather than repeating the check `defined(MBEDTLS_AESNI_C) && ...`. Signed-off-by: Gilles Peskine --- library/aes.c | 6 +++--- library/aesni.h | 22 ++++++++++++++++++++-- library/gcm.c | 6 +++--- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/library/aes.c b/library/aes.c index 64392fc56b..15b505f8ab 100644 --- a/library/aes.c +++ b/library/aes.c @@ -541,7 +541,7 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key, #endif RK = ctx->buf + ctx->rk_offset; -#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64) +#if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits); } @@ -653,7 +653,7 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key, ctx->nr = cty.nr; -#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64) +#if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { mbedtls_aesni_inverse_key((unsigned char *) RK, (const unsigned char *) (cty.buf + cty.rk_offset), ctx->nr); @@ -957,7 +957,7 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, return MBEDTLS_ERR_AES_BAD_INPUT_DATA; } -#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64) +#if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { return mbedtls_aesni_crypt_ecb(ctx, mode, input, output); } diff --git a/library/aesni.h b/library/aesni.h index a842fb703b..c1c4bdd8f8 100644 --- a/library/aesni.h +++ b/library/aesni.h @@ -32,13 +32,30 @@ #define MBEDTLS_AESNI_AES 0x02000000u #define MBEDTLS_AESNI_CLMUL 0x00000002u -#if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__) && \ +#if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__) && \ (defined(__amd64__) || defined(__x86_64__)) && \ !defined(MBEDTLS_HAVE_X86_64) #define MBEDTLS_HAVE_X86_64 #endif +#if defined(MBEDTLS_AESNI_C) + #if defined(MBEDTLS_HAVE_X86_64) +#define MBEDTLS_AESNI_HAVE_CODE // via assembly +#endif + +#if defined(_MSC_VER) +#define MBEDTLS_HAVE_AESNI_INTRINSICS +#endif +#if defined(__GNUC__) && defined(__AES__) +#define MBEDTLS_HAVE_AESNI_INTRINSICS +#endif + +#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#define MBEDTLS_AESNI_HAVE_CODE // via intrinsics +#endif + +#if defined(MBEDTLS_AESNI_HAVE_CODE) #ifdef __cplusplus extern "C" { @@ -127,6 +144,7 @@ int mbedtls_aesni_setkey_enc(unsigned char *rk, } #endif -#endif /* MBEDTLS_HAVE_X86_64 */ +#endif /* MBEDTLS_AESNI_HAVE_CODE */ +#endif /* MBEDTLS_AESNI_C */ #endif /* MBEDTLS_AESNI_H */ diff --git a/library/gcm.c b/library/gcm.c index 6d4495fd39..8e773d7e7d 100644 --- a/library/gcm.c +++ b/library/gcm.c @@ -86,7 +86,7 @@ static int gcm_gen_table(mbedtls_gcm_context *ctx) ctx->HL[8] = vl; ctx->HH[8] = vh; -#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64) +#if defined(MBEDTLS_AESNI_HAVE_CODE) /* With CLMUL support, we need only h, not the rest of the table */ if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) { return 0; @@ -183,7 +183,7 @@ static void gcm_mult(mbedtls_gcm_context *ctx, const unsigned char x[16], unsigned char lo, hi, rem; uint64_t zh, zl; -#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64) +#if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) { unsigned char h[16]; @@ -195,7 +195,7 @@ static void gcm_mult(mbedtls_gcm_context *ctx, const unsigned char x[16], mbedtls_aesni_gcm_mult(output, x, h); return; } -#endif /* MBEDTLS_AESNI_C && MBEDTLS_HAVE_X86_64 */ +#endif /* MBEDTLS_AESNI_HAVE_CODE */ lo = x[15] & 0xf; From 7e67bd516db0cc6a9bcfad8567f2c1b8e1424b6a Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 10 Mar 2023 22:35:24 +0100 Subject: [PATCH 30/53] AES, GCM selftest: indicate which implementation is used Signed-off-by: Gilles Peskine --- library/aes.c | 23 +++++++++++++++++++++++ library/gcm.c | 14 ++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/library/aes.c b/library/aes.c index 15b505f8ab..e41810a54d 100644 --- a/library/aes.c +++ b/library/aes.c @@ -1729,6 +1729,29 @@ int mbedtls_aes_self_test(int verbose) memset(key, 0, 32); mbedtls_aes_init(&ctx); + if (verbose != 0) { +#if defined(MBEDTLS_AES_ALT) + mbedtls_printf(" AES note: alternative implementation.\n"); +#else /* MBEDTLS_AES_ALT */ +#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) + if (mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE)) { + mbedtls_printf(" AES note: using VIA Padlock.\n"); + } else +#endif +#if defined(MBEDTLS_AESNI_HAVE_CODE) + if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { + mbedtls_printf(" AES note: using AESNI.\n"); + } else +#endif +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if (mbedtls_aesce_has_support()) { + mbedtls_printf(" AES note: using AESCE.\n"); + } else +#endif + mbedtls_printf(" AES note: built-in implementation.\n"); +#endif /* MBEDTLS_AES_ALT */ + } + /* * ECB mode */ diff --git a/library/gcm.c b/library/gcm.c index 8e773d7e7d..4e8ec08815 100644 --- a/library/gcm.c +++ b/library/gcm.c @@ -845,6 +845,20 @@ int mbedtls_gcm_self_test(int verbose) mbedtls_cipher_id_t cipher = MBEDTLS_CIPHER_ID_AES; size_t olen; + if (verbose != 0) + { +#if defined(MBEDTLS_GCM_ALT) + mbedtls_printf(" GCM note: alternative implementation.\n"); +#else /* MBEDTLS_GCM_ALT */ +#if defined(MBEDTLS_AESNI_HAVE_CODE) + if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) { + mbedtls_printf(" GCM note: using AESNI.\n"); + } else +#endif + mbedtls_printf(" GCM note: built-in implementation.\n"); +#endif /* MBEDTLS_GCM_ALT */ + } + for (j = 0; j < 3; j++) { int key_len = 128 + 64 * j; From d671917d0dd1dcc498ee384dacafdbad674d7c88 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 10 Mar 2023 22:37:11 +0100 Subject: [PATCH 31/53] AESNI: add implementation with intrinsics As of this commit, to use the intrinsics for MBEDTLS_AESNI_C: * With MSVC, this should be the default. * With Clang, build with `clang -maes -mpclmul` or equivalent. * With GCC, build with `gcc -mpclmul -msse2` or equivalent. In particular, for now, with a GCC-like compiler, when building specifically for a target that supports both the AES and GCM instructions, the old implementation using assembly is selected. This method for platform selection will likely be improved in the future. Signed-off-by: Gilles Peskine --- library/aes.c | 17 +++ library/aesni.c | 339 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 355 insertions(+), 1 deletion(-) diff --git a/library/aes.c b/library/aes.c index e41810a54d..69d4eadfa8 100644 --- a/library/aes.c +++ b/library/aes.c @@ -543,6 +543,13 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key, #if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { + /* The intrinsics-based implementation needs 16-byte alignment + * for the round key array. */ + unsigned delta = (uintptr_t) ctx->buf & 0x0000000f; + if (delta != 0) { + ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t + } + RK = ctx->buf + ctx->rk_offset; return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits); } #endif @@ -643,6 +650,16 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key, if (aes_padlock_ace) { ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf; } +#endif +#if defined(MBEDTLS_AESNI_HAVE_CODE) + if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { + /* The intrinsics-based implementation needs 16-byte alignment + * for the round key array. */ + unsigned delta = (uintptr_t) ctx->buf & 0x0000000f; + if (delta != 0) { + ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t + } + } #endif RK = ctx->buf + ctx->rk_offset; diff --git a/library/aesni.c b/library/aesni.c index 77243ea387..3acc329712 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -30,7 +30,12 @@ #include -#if defined(MBEDTLS_HAVE_X86_64) +#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) || defined(MBEDTLS_HAVE_X86_64) + +#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#include +#include +#endif /* * AES-NI support detection routine @@ -41,17 +46,347 @@ int mbedtls_aesni_has_support(unsigned int what) static unsigned int c = 0; if (!done) { +#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) + static unsigned info[4] = { 0, 0, 0, 0 }; +#if defined(_MSC_VER) + __cpuid(info, 1); +#else + __cpuid(1, info[0], info[1], info[2], info[3]); +#endif + c = info[2]; +#else asm ("movl $1, %%eax \n\t" "cpuid \n\t" : "=c" (c) : : "eax", "ebx", "edx"); +#endif done = 1; } return (c & what) != 0; } +#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) + +/* + * AES-NI AES-ECB block en(de)cryption + */ +int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx, + int mode, + const unsigned char input[16], + unsigned char output[16]) +{ + const __m128i *rk = (const __m128i *) (ctx->buf + ctx->rk_offset); + unsigned nr = ctx->nr; // Number of remaining rounds + // Load round key 0 + __m128i xmm0; + memcpy(&xmm0, input, 16); + xmm0 ^= *rk; + ++rk; + --nr; + + if (mode == 0) { + while (nr != 0) { + xmm0 = _mm_aesdec_si128(xmm0, *rk); + ++rk; + --nr; + } + xmm0 = _mm_aesdeclast_si128(xmm0, *rk); + } else { + while (nr != 0) { + xmm0 = _mm_aesenc_si128(xmm0, *rk); + ++rk; + --nr; + } + xmm0 = _mm_aesenclast_si128(xmm0, *rk); + } + + memcpy(output, &xmm0, 16); + return 0; +} + +/* + * GCM multiplication: c = a times b in GF(2^128) + * Based on [CLMUL-WP] algorithms 1 (with equation 27) and 5. + */ + +static void gcm_clmul(const __m128i aa, const __m128i bb, + __m128i *cc, __m128i *dd) +{ + /* + * Caryless multiplication dd:cc = aa * bb + * using [CLMUL-WP] algorithm 1 (p. 12). + */ + *cc = _mm_clmulepi64_si128(aa, bb, 0x00); // a0*b0 = c1:c0 + *dd = _mm_clmulepi64_si128(aa, bb, 0x11); // a1*b1 = d1:d0 + __m128i ee = _mm_clmulepi64_si128(aa, bb, 0x10); // a0*b1 = e1:e0 + __m128i ff = _mm_clmulepi64_si128(aa, bb, 0x01); // a1*b0 = f1:f0 + ff ^= ee; // e1+f1:e0+f0 + ee = ff; // e1+f1:e0+f0 + ff = _mm_srli_si128(ff, 8); // 0:e1+f1 + ee = _mm_slli_si128(ee, 8); // e0+f0:0 + *dd ^= ff; // d1:d0+e1+f1 + *cc ^= ee; // c1+e0+f1:c0 +} + +static void gcm_shift(__m128i *cc, __m128i *dd) +{ + /* + * Now shift the result one bit to the left, + * taking advantage of [CLMUL-WP] eq 27 (p. 18) + */ + // // *cc = r1:r0 + // // *dd = r3:r2 + __m128i xmm1 = _mm_slli_epi64(*cc, 1); // r1<<1:r0<<1 + __m128i xmm2 = _mm_slli_epi64(*dd, 1); // r3<<1:r2<<1 + __m128i xmm3 = _mm_srli_epi64(*cc, 63); // r1>>63:r0>>63 + __m128i xmm4 = _mm_srli_epi64(*dd, 63); // r3>>63:r2>>63 + __m128i xmm5 = _mm_srli_si128(xmm3, 8); // 0:r1>>63 + xmm3 = _mm_slli_si128(xmm3, 8); // r0>>63:0 + xmm4 = _mm_slli_si128(xmm4, 8); // 0:r1>>63 + + *cc = xmm1 | xmm3; // r1<<1|r0>>63:r0<<1 + *dd = xmm2 | xmm4 | xmm5; // r3<<1|r2>>62:r2<<1|r1>>63 +} + +static __m128i gcm_reduce1(__m128i xx) +{ + // // xx = x1:x0 + /* [CLMUL-WP] Algorithm 5 Step 2 */ + __m128i aa = _mm_slli_epi64(xx, 63); // x1<<63:x0<<63 = stuff:a + __m128i bb = _mm_slli_epi64(xx, 62); // x1<<62:x0<<62 = stuff:b + __m128i cc = _mm_slli_epi64(xx, 57); // x1<<57:x0<<57 = stuff:c + __m128i dd = _mm_slli_si128(aa ^ bb ^ cc, 8); // a+b+c:0 + return dd ^ xx; // x1+a+b+c:x0 = d:x0 +} + +static __m128i gcm_reduce2(__m128i dx) +{ + /* [CLMUL-WP] Algorithm 5 Steps 3 and 4 */ + __m128i ee = _mm_srli_epi64(dx, 1); // e1:x0>>1 = e1:e0' + __m128i ff = _mm_srli_epi64(dx, 2); // f1:x0>>2 = f1:f0' + __m128i gg = _mm_srli_epi64(dx, 7); // g1:x0>>7 = g1:g0' + + // e0'+f0'+g0' is almost e0+f0+g0, except for some missing + // bits carried from d. Now get those bits back in. + __m128i eh = _mm_slli_epi64(dx, 63); // d<<63:stuff + __m128i fh = _mm_slli_epi64(dx, 62); // d<<62:stuff + __m128i gh = _mm_slli_epi64(dx, 57); // d<<57:stuff + __m128i hh = _mm_srli_si128(eh ^ fh ^ gh, 8); // 0:missing bits of d + + return ee ^ ff ^ gg ^ hh ^ dx; +} + +void mbedtls_aesni_gcm_mult(unsigned char c[16], + const unsigned char a[16], + const unsigned char b[16]) +{ + __m128i aa, bb, cc, dd; + + /* The inputs are in big-endian order, so byte-reverse them */ + for (size_t i = 0; i < 16; i++) { + ((uint8_t *) &aa)[i] = a[15 - i]; + ((uint8_t *) &bb)[i] = b[15 - i]; + } + + gcm_clmul(aa, bb, &cc, &dd); + gcm_shift(&cc, &dd); + /* + * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1 + * using [CLMUL-WP] algorithm 5 (p. 18). + * Currently dd:cc holds x3:x2:x1:x0 (already shifted). + */ + __m128i dx = gcm_reduce1(cc); + __m128i xh = gcm_reduce2(dx); + cc = xh ^ dd; // x3+h1:x2+h0 + + /* Now byte-reverse the outputs */ + for (size_t i = 0; i < 16; i++) { + c[i] = ((uint8_t *) &cc)[15 - i]; + } + + return; +} + +/* + * Compute decryption round keys from encryption round keys + */ +void mbedtls_aesni_inverse_key(unsigned char *invkey, + const unsigned char *fwdkey, int nr) +{ + __m128i *ik = (__m128i *) invkey; + const __m128i *fk = (const __m128i *) fwdkey + nr; + + *ik = *fk; + for (--fk, ++ik; fk > (const __m128i *) fwdkey; --fk, ++ik) { + *ik = _mm_aesimc_si128(*fk); + } + *ik = *fk; +} + +/* + * Key expansion, 128-bit case + */ +static __m128i aesni_set_rk_128(__m128i xmm0, __m128i xmm1) +{ + /* + * Finish generating the next round key. + * + * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff + * with X = rot( sub( r3 ) ) ^ RCON. + * + * On exit, xmm1 is r7:r6:r5:r4 + * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3 + * and this is returned, to be written to the round key buffer. + */ + xmm1 = _mm_shuffle_epi32(xmm1, 0xff); // X:X:X:X + xmm1 ^= xmm0; // X+r3:X+r2:X+r1:r4 + xmm0 = _mm_slli_si128(xmm0, 4); // r2:r1:r0:0 + xmm1 ^= xmm0; // X+r3+r2:X+r2+r1:r5:r4 + xmm0 = _mm_slli_si128(xmm0, 4); // r1:r0:0:0 + xmm1 ^= xmm0; // X+r3+r2+r1:r6:r5:r4 + xmm0 = _mm_slli_si128(xmm0, 4); // r0:0:0:0 + xmm1 ^= xmm0; // r7:r6:r5:r4 + return xmm1; +} + +static void aesni_setkey_enc_128(unsigned char *rk_bytes, + const unsigned char *key) +{ + __m128i *rk = (__m128i *) rk_bytes; + + memcpy(&rk[0], key, 16); + rk[1] = aesni_set_rk_128(rk[0], _mm_aeskeygenassist_si128(rk[0], 0x01)); + rk[2] = aesni_set_rk_128(rk[1], _mm_aeskeygenassist_si128(rk[1], 0x02)); + rk[3] = aesni_set_rk_128(rk[2], _mm_aeskeygenassist_si128(rk[2], 0x04)); + rk[4] = aesni_set_rk_128(rk[3], _mm_aeskeygenassist_si128(rk[3], 0x08)); + rk[5] = aesni_set_rk_128(rk[4], _mm_aeskeygenassist_si128(rk[4], 0x10)); + rk[6] = aesni_set_rk_128(rk[5], _mm_aeskeygenassist_si128(rk[5], 0x20)); + rk[7] = aesni_set_rk_128(rk[6], _mm_aeskeygenassist_si128(rk[6], 0x40)); + rk[8] = aesni_set_rk_128(rk[7], _mm_aeskeygenassist_si128(rk[7], 0x80)); + rk[9] = aesni_set_rk_128(rk[8], _mm_aeskeygenassist_si128(rk[8], 0x1B)); + rk[10] = aesni_set_rk_128(rk[9], _mm_aeskeygenassist_si128(rk[9], 0x36)); +} + +/* + * Key expansion, 192-bit case + */ +static void aesni_set_rk_192(__m128i *xmm0, __m128i *xmm1, __m128i xmm2, + unsigned char *rk) +{ + /* + * Finish generating the next 6 quarter-keys. + * + * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4 + * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON. + * + * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10 + * and those are written to the round key buffer. + */ + xmm2 = _mm_shuffle_epi32(xmm2, 0x55); // X:X:X:X + xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3:X+r2:X+r1:X+r0 + *xmm0 = _mm_slli_si128(*xmm0, 4); // r2:r1:r0:0 + xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3+r2:X+r2+r1:X+r1+r0:X+r0 + *xmm0 = _mm_slli_si128(*xmm0, 4); // r1:r0:0:0 + xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3+r2+r1:X+r2+r1+r0:X+r1+r0:X+r0 + *xmm0 = _mm_slli_si128(*xmm0, 4); // r0:0:0:0 + xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3+r2+r1+r0:X+r2+r1+r0:X+r1+r0:X+r0 + *xmm0 = xmm2; // = r9:r8:r7:r6 + + xmm2 = _mm_shuffle_epi32(xmm2, 0xff); // r9:r9:r9:r9 + xmm2 = _mm_xor_si128(xmm2, *xmm1); // stuff:stuff:r9+r5:r9+r4 + *xmm1 = _mm_slli_si128(*xmm1, 4); // stuff:stuff:r4:0 + xmm2 = _mm_xor_si128(xmm2, *xmm1); // stuff:stuff:r9+r5+r4:r9+r4 + *xmm1 = xmm2; // = stuff:stuff:r11:r10 + + /* Store xmm0 and the low half of xmm1 into rk, which is conceptually + * an array of 24-byte elements. Since 24 is not a multiple of 16, + * rk is not necessarily aligned so just `*rk = *xmm0` doesn't work. */ + memcpy(rk, xmm0, 16); + _mm_storeu_si64(rk + 16, *xmm1); +} + +static void aesni_setkey_enc_192(unsigned char *rk, + const unsigned char *key) +{ + /* First round: use original key */ + memcpy(rk, key, 24); + /* aes.c guarantees that rk is aligned on a 16-byte boundary. */ + __m128i xmm0 = ((__m128i *) rk)[0]; + __m128i xmm1 = _mm_loadl_epi64(((__m128i *) rk) + 1); + + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x01), rk + 24 * 1); + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x02), rk + 24 * 2); + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x04), rk + 24 * 3); + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x08), rk + 24 * 4); + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x10), rk + 24 * 5); + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x20), rk + 24 * 6); + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x40), rk + 24 * 7); + aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x80), rk + 24 * 8); +} + +/* + * Key expansion, 256-bit case + */ +static void aesni_set_rk_256(__m128i xmm0, __m128i xmm1, __m128i xmm2, + __m128i *rk0, __m128i *rk1) +{ + /* + * Finish generating the next two round keys. + * + * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and + * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON + * + * On exit, *rk0 is r11:r10:r9:r8 and *rk1 is r15:r14:r13:r12 + */ + xmm2 = _mm_shuffle_epi32(xmm2, 0xff); + xmm2 ^= xmm0; + xmm0 = _mm_slli_si128(xmm0, 4); + xmm2 ^= xmm0; + xmm0 = _mm_slli_si128(xmm0, 4); + xmm2 ^= xmm0; + xmm0 = _mm_slli_si128(xmm0, 4); + xmm0 ^= xmm2; + *rk0 = xmm0; + + /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 ) + * and proceed to generate next round key from there */ + xmm2 = _mm_aeskeygenassist_si128(xmm0, 0x00); + xmm2 = _mm_shuffle_epi32(xmm2, 0xaa); + xmm2 ^= xmm1; + xmm1 = _mm_slli_si128(xmm1, 4); + xmm2 ^= xmm1; + xmm1 = _mm_slli_si128(xmm1, 4); + xmm2 ^= xmm1; + xmm1 = _mm_slli_si128(xmm1, 4); + xmm1 ^= xmm2; + *rk1 = xmm1; +} + +static void aesni_setkey_enc_256(unsigned char *rk_bytes, + const unsigned char *key) +{ + __m128i *rk = (__m128i *) rk_bytes; + + memcpy(&rk[0], key, 16); + memcpy(&rk[1], key + 16, 16); + + /* + * Main "loop" - Generating one more key than necessary, + * see definition of mbedtls_aes_context.buf + */ + aesni_set_rk_256(rk[0], rk[1], _mm_aeskeygenassist_si128(rk[1], 0x01), &rk[2], &rk[3]); + aesni_set_rk_256(rk[2], rk[3], _mm_aeskeygenassist_si128(rk[3], 0x02), &rk[4], &rk[5]); + aesni_set_rk_256(rk[4], rk[5], _mm_aeskeygenassist_si128(rk[5], 0x04), &rk[6], &rk[7]); + aesni_set_rk_256(rk[6], rk[7], _mm_aeskeygenassist_si128(rk[7], 0x08), &rk[8], &rk[9]); + aesni_set_rk_256(rk[8], rk[9], _mm_aeskeygenassist_si128(rk[9], 0x10), &rk[10], &rk[11]); + aesni_set_rk_256(rk[10], rk[11], _mm_aeskeygenassist_si128(rk[11], 0x20), &rk[12], &rk[13]); + aesni_set_rk_256(rk[12], rk[13], _mm_aeskeygenassist_si128(rk[13], 0x40), &rk[14], &rk[15]); +} + +#else /* MBEDTLS_HAVE_AESNI_INTRINSICS */ + #if defined(__has_feature) #if __has_feature(memory_sanitizer) #warning \ @@ -438,6 +773,8 @@ static void aesni_setkey_enc_256(unsigned char *rk, : "memory", "cc", "0"); } +#endif /* MBEDTLS_HAVE_AESNI_INTRINSICS */ + /* * Key expansion, wrapper */ From 02edb7546fc1bea36cece9738b51b6c78c1e6a94 Mon Sep 17 00:00:00 2001 From: Tom Cosgrove Date: Mon, 13 Mar 2023 15:32:52 +0000 Subject: [PATCH 32/53] Get aesni.c compiling with Visual Studio Clang is nice enough to support bitwise operators on __m128i, but MSVC isn't. Also, __cpuid() in MSVC comes from (which is included via ), not . Signed-off-by: Tom Cosgrove --- library/aesni.c | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/library/aesni.c b/library/aesni.c index 3acc329712..30b4b17ca1 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -33,7 +33,9 @@ #if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) || defined(MBEDTLS_HAVE_X86_64) #if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#if !defined(_WIN32) #include +#endif #include #endif @@ -79,10 +81,11 @@ int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx, { const __m128i *rk = (const __m128i *) (ctx->buf + ctx->rk_offset); unsigned nr = ctx->nr; // Number of remaining rounds + // Load round key 0 __m128i xmm0; memcpy(&xmm0, input, 16); - xmm0 ^= *rk; + xmm0 = _mm_xor_si128(xmm0, rk[0]); // xmm0 ^= *rk; ++rk; --nr; @@ -122,12 +125,12 @@ static void gcm_clmul(const __m128i aa, const __m128i bb, *dd = _mm_clmulepi64_si128(aa, bb, 0x11); // a1*b1 = d1:d0 __m128i ee = _mm_clmulepi64_si128(aa, bb, 0x10); // a0*b1 = e1:e0 __m128i ff = _mm_clmulepi64_si128(aa, bb, 0x01); // a1*b0 = f1:f0 - ff ^= ee; // e1+f1:e0+f0 + ff = _mm_xor_si128(ff, ee); // e1+f1:e0+f0 ee = ff; // e1+f1:e0+f0 ff = _mm_srli_si128(ff, 8); // 0:e1+f1 ee = _mm_slli_si128(ee, 8); // e0+f0:0 - *dd ^= ff; // d1:d0+e1+f1 - *cc ^= ee; // c1+e0+f1:c0 + *dd = _mm_xor_si128(*dd, ff); // d1:d0+e1+f1 + *cc = _mm_xor_si128(*cc, ee); // c1+e0+f1:c0 } static void gcm_shift(__m128i *cc, __m128i *dd) @@ -146,8 +149,8 @@ static void gcm_shift(__m128i *cc, __m128i *dd) xmm3 = _mm_slli_si128(xmm3, 8); // r0>>63:0 xmm4 = _mm_slli_si128(xmm4, 8); // 0:r1>>63 - *cc = xmm1 | xmm3; // r1<<1|r0>>63:r0<<1 - *dd = xmm2 | xmm4 | xmm5; // r3<<1|r2>>62:r2<<1|r1>>63 + *cc = _mm_or_si128(xmm1, xmm3); // r1<<1|r0>>63:r0<<1 + *dd = _mm_or_si128(_mm_or_si128(xmm2, xmm4), xmm5); // r3<<1|r2>>62:r2<<1|r1>>63 } static __m128i gcm_reduce1(__m128i xx) @@ -157,8 +160,8 @@ static __m128i gcm_reduce1(__m128i xx) __m128i aa = _mm_slli_epi64(xx, 63); // x1<<63:x0<<63 = stuff:a __m128i bb = _mm_slli_epi64(xx, 62); // x1<<62:x0<<62 = stuff:b __m128i cc = _mm_slli_epi64(xx, 57); // x1<<57:x0<<57 = stuff:c - __m128i dd = _mm_slli_si128(aa ^ bb ^ cc, 8); // a+b+c:0 - return dd ^ xx; // x1+a+b+c:x0 = d:x0 + __m128i dd = _mm_slli_si128(_mm_xor_si128(_mm_xor_si128(aa, bb), cc), 8); // a+b+c:0 + return _mm_xor_si128(dd, xx); // x1+a+b+c:x0 = d:x0 } static __m128i gcm_reduce2(__m128i dx) @@ -173,9 +176,9 @@ static __m128i gcm_reduce2(__m128i dx) __m128i eh = _mm_slli_epi64(dx, 63); // d<<63:stuff __m128i fh = _mm_slli_epi64(dx, 62); // d<<62:stuff __m128i gh = _mm_slli_epi64(dx, 57); // d<<57:stuff - __m128i hh = _mm_srli_si128(eh ^ fh ^ gh, 8); // 0:missing bits of d + __m128i hh = _mm_srli_si128(_mm_xor_si128(_mm_xor_si128(eh, fh), gh), 8); // 0:missing bits of d - return ee ^ ff ^ gg ^ hh ^ dx; + return _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(_mm_xor_si128(ee, ff), gg), hh), dx); } void mbedtls_aesni_gcm_mult(unsigned char c[16], @@ -199,7 +202,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16], */ __m128i dx = gcm_reduce1(cc); __m128i xh = gcm_reduce2(dx); - cc = xh ^ dd; // x3+h1:x2+h0 + cc = _mm_xor_si128(xh, dd); // x3+h1:x2+h0 /* Now byte-reverse the outputs */ for (size_t i = 0; i < 16; i++) { @@ -241,13 +244,13 @@ static __m128i aesni_set_rk_128(__m128i xmm0, __m128i xmm1) * and this is returned, to be written to the round key buffer. */ xmm1 = _mm_shuffle_epi32(xmm1, 0xff); // X:X:X:X - xmm1 ^= xmm0; // X+r3:X+r2:X+r1:r4 + xmm1 = _mm_xor_si128(xmm1, xmm0); // X+r3:X+r2:X+r1:r4 xmm0 = _mm_slli_si128(xmm0, 4); // r2:r1:r0:0 - xmm1 ^= xmm0; // X+r3+r2:X+r2+r1:r5:r4 + xmm1 = _mm_xor_si128(xmm1, xmm0); // X+r3+r2:X+r2+r1:r5:r4 xmm0 = _mm_slli_si128(xmm0, 4); // r1:r0:0:0 - xmm1 ^= xmm0; // X+r3+r2+r1:r6:r5:r4 + xmm1 = _mm_xor_si128(xmm1, xmm0); // X+r3+r2+r1:r6:r5:r4 xmm0 = _mm_slli_si128(xmm0, 4); // r0:0:0:0 - xmm1 ^= xmm0; // r7:r6:r5:r4 + xmm1 = _mm_xor_si128(xmm1, xmm0); // r7:r6:r5:r4 return xmm1; } @@ -341,26 +344,26 @@ static void aesni_set_rk_256(__m128i xmm0, __m128i xmm1, __m128i xmm2, * On exit, *rk0 is r11:r10:r9:r8 and *rk1 is r15:r14:r13:r12 */ xmm2 = _mm_shuffle_epi32(xmm2, 0xff); - xmm2 ^= xmm0; + xmm2 = _mm_xor_si128(xmm2, xmm0); xmm0 = _mm_slli_si128(xmm0, 4); - xmm2 ^= xmm0; + xmm2 = _mm_xor_si128(xmm2, xmm0); xmm0 = _mm_slli_si128(xmm0, 4); - xmm2 ^= xmm0; + xmm2 = _mm_xor_si128(xmm2, xmm0); xmm0 = _mm_slli_si128(xmm0, 4); - xmm0 ^= xmm2; + xmm0 = _mm_xor_si128(xmm0, xmm2); *rk0 = xmm0; /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 ) * and proceed to generate next round key from there */ xmm2 = _mm_aeskeygenassist_si128(xmm0, 0x00); xmm2 = _mm_shuffle_epi32(xmm2, 0xaa); - xmm2 ^= xmm1; + xmm2 = _mm_xor_si128(xmm2, xmm1); xmm1 = _mm_slli_si128(xmm1, 4); - xmm2 ^= xmm1; + xmm2 = _mm_xor_si128(xmm2, xmm1); xmm1 = _mm_slli_si128(xmm1, 4); - xmm2 ^= xmm1; + xmm2 = _mm_xor_si128(xmm2, xmm1); xmm1 = _mm_slli_si128(xmm1, 4); - xmm1 ^= xmm2; + xmm1 = _mm_xor_si128(xmm1, xmm2); *rk1 = xmm1; } From dafeee48141787c8cf160763cdbdbb16eab897c5 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Wed, 15 Mar 2023 20:37:57 +0100 Subject: [PATCH 33/53] Improve variable names To some extent anyway. Signed-off-by: Gilles Peskine --- library/aesni.c | 190 ++++++++++++++++++++++++------------------------ 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/library/aesni.c b/library/aesni.c index 30b4b17ca1..c52e63c2f4 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -83,29 +83,29 @@ int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx, unsigned nr = ctx->nr; // Number of remaining rounds // Load round key 0 - __m128i xmm0; - memcpy(&xmm0, input, 16); - xmm0 = _mm_xor_si128(xmm0, rk[0]); // xmm0 ^= *rk; + __m128i state; + memcpy(&state, input, 16); + state = _mm_xor_si128(state, rk[0]); // state ^= *rk; ++rk; --nr; if (mode == 0) { while (nr != 0) { - xmm0 = _mm_aesdec_si128(xmm0, *rk); + state = _mm_aesdec_si128(state, *rk); ++rk; --nr; } - xmm0 = _mm_aesdeclast_si128(xmm0, *rk); + state = _mm_aesdeclast_si128(state, *rk); } else { while (nr != 0) { - xmm0 = _mm_aesenc_si128(xmm0, *rk); + state = _mm_aesenc_si128(state, *rk); ++rk; --nr; } - xmm0 = _mm_aesenclast_si128(xmm0, *rk); + state = _mm_aesenclast_si128(state, *rk); } - memcpy(output, &xmm0, 16); + memcpy(output, &state, 16); return 0; } @@ -135,25 +135,23 @@ static void gcm_clmul(const __m128i aa, const __m128i bb, static void gcm_shift(__m128i *cc, __m128i *dd) { - /* - * Now shift the result one bit to the left, - * taking advantage of [CLMUL-WP] eq 27 (p. 18) - */ - // // *cc = r1:r0 - // // *dd = r3:r2 - __m128i xmm1 = _mm_slli_epi64(*cc, 1); // r1<<1:r0<<1 - __m128i xmm2 = _mm_slli_epi64(*dd, 1); // r3<<1:r2<<1 - __m128i xmm3 = _mm_srli_epi64(*cc, 63); // r1>>63:r0>>63 - __m128i xmm4 = _mm_srli_epi64(*dd, 63); // r3>>63:r2>>63 - __m128i xmm5 = _mm_srli_si128(xmm3, 8); // 0:r1>>63 - xmm3 = _mm_slli_si128(xmm3, 8); // r0>>63:0 - xmm4 = _mm_slli_si128(xmm4, 8); // 0:r1>>63 + /* [CMUCL-WP] Algorithm 5 Step 1: shift cc:dd one bit to the left, + * taking advantage of [CLMUL-WP] eq 27 (p. 18). */ + // // *cc = r1:r0 + // // *dd = r3:r2 + __m128i cc_lo = _mm_slli_epi64(*cc, 1); // r1<<1:r0<<1 + __m128i dd_lo = _mm_slli_epi64(*dd, 1); // r3<<1:r2<<1 + __m128i cc_hi = _mm_srli_epi64(*cc, 63); // r1>>63:r0>>63 + __m128i dd_hi = _mm_srli_epi64(*dd, 63); // r3>>63:r2>>63 + __m128i xmm5 = _mm_srli_si128(cc_hi, 8); // 0:r1>>63 + cc_hi = _mm_slli_si128(cc_hi, 8); // r0>>63:0 + dd_hi = _mm_slli_si128(dd_hi, 8); // 0:r1>>63 - *cc = _mm_or_si128(xmm1, xmm3); // r1<<1|r0>>63:r0<<1 - *dd = _mm_or_si128(_mm_or_si128(xmm2, xmm4), xmm5); // r3<<1|r2>>62:r2<<1|r1>>63 + *cc = _mm_or_si128(cc_lo, cc_hi); // r1<<1|r0>>63:r0<<1 + *dd = _mm_or_si128(_mm_or_si128(dd_lo, dd_hi), xmm5); // r3<<1|r2>>62:r2<<1|r1>>63 } -static __m128i gcm_reduce1(__m128i xx) +static __m128i gcm_reduce(__m128i xx) { // // xx = x1:x0 /* [CLMUL-WP] Algorithm 5 Step 2 */ @@ -164,7 +162,7 @@ static __m128i gcm_reduce1(__m128i xx) return _mm_xor_si128(dd, xx); // x1+a+b+c:x0 = d:x0 } -static __m128i gcm_reduce2(__m128i dx) +static __m128i gcm_mix(__m128i dx) { /* [CLMUL-WP] Algorithm 5 Steps 3 and 4 */ __m128i ee = _mm_srli_epi64(dx, 1); // e1:x0>>1 = e1:e0' @@ -200,8 +198,8 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16], * using [CLMUL-WP] algorithm 5 (p. 18). * Currently dd:cc holds x3:x2:x1:x0 (already shifted). */ - __m128i dx = gcm_reduce1(cc); - __m128i xh = gcm_reduce2(dx); + __m128i dx = gcm_reduce(cc); + __m128i xh = gcm_mix(dx); cc = _mm_xor_si128(xh, dd); // x3+h1:x2+h0 /* Now byte-reverse the outputs */ @@ -231,27 +229,27 @@ void mbedtls_aesni_inverse_key(unsigned char *invkey, /* * Key expansion, 128-bit case */ -static __m128i aesni_set_rk_128(__m128i xmm0, __m128i xmm1) +static __m128i aesni_set_rk_128(__m128i state, __m128i xword) { /* * Finish generating the next round key. * - * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff - * with X = rot( sub( r3 ) ) ^ RCON. + * On entry state is r3:r2:r1:r0 and xword is X:stuff:stuff:stuff + * with X = rot( sub( r3 ) ) ^ RCON (obtained with AESKEYGENASSIST). * - * On exit, xmm1 is r7:r6:r5:r4 + * On exit, xword is r7:r6:r5:r4 * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3 * and this is returned, to be written to the round key buffer. */ - xmm1 = _mm_shuffle_epi32(xmm1, 0xff); // X:X:X:X - xmm1 = _mm_xor_si128(xmm1, xmm0); // X+r3:X+r2:X+r1:r4 - xmm0 = _mm_slli_si128(xmm0, 4); // r2:r1:r0:0 - xmm1 = _mm_xor_si128(xmm1, xmm0); // X+r3+r2:X+r2+r1:r5:r4 - xmm0 = _mm_slli_si128(xmm0, 4); // r1:r0:0:0 - xmm1 = _mm_xor_si128(xmm1, xmm0); // X+r3+r2+r1:r6:r5:r4 - xmm0 = _mm_slli_si128(xmm0, 4); // r0:0:0:0 - xmm1 = _mm_xor_si128(xmm1, xmm0); // r7:r6:r5:r4 - return xmm1; + xword = _mm_shuffle_epi32(xword, 0xff); // X:X:X:X + xword = _mm_xor_si128(xword, state); // X+r3:X+r2:X+r1:r4 + state = _mm_slli_si128(state, 4); // r2:r1:r0:0 + xword = _mm_xor_si128(xword, state); // X+r3+r2:X+r2+r1:r5:r4 + state = _mm_slli_si128(state, 4); // r1:r0:0:0 + xword = _mm_xor_si128(xword, state); // X+r3+r2+r1:r6:r5:r4 + state = _mm_slli_si128(state, 4); // r0:0:0:0 + state = _mm_xor_si128(xword, state); // r7:r6:r5:r4 + return state; } static void aesni_setkey_enc_128(unsigned char *rk_bytes, @@ -275,39 +273,40 @@ static void aesni_setkey_enc_128(unsigned char *rk_bytes, /* * Key expansion, 192-bit case */ -static void aesni_set_rk_192(__m128i *xmm0, __m128i *xmm1, __m128i xmm2, +static void aesni_set_rk_192(__m128i *state0, __m128i *state1, __m128i xword, unsigned char *rk) { /* * Finish generating the next 6 quarter-keys. * - * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4 - * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON. + * On entry state0 is r3:r2:r1:r0, state1 is stuff:stuff:r5:r4 + * and xword is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON + * (obtained with AESKEYGENASSIST). * - * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10 + * On exit, state0 is r9:r8:r7:r6 and state1 is stuff:stuff:r11:r10 * and those are written to the round key buffer. */ - xmm2 = _mm_shuffle_epi32(xmm2, 0x55); // X:X:X:X - xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3:X+r2:X+r1:X+r0 - *xmm0 = _mm_slli_si128(*xmm0, 4); // r2:r1:r0:0 - xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3+r2:X+r2+r1:X+r1+r0:X+r0 - *xmm0 = _mm_slli_si128(*xmm0, 4); // r1:r0:0:0 - xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3+r2+r1:X+r2+r1+r0:X+r1+r0:X+r0 - *xmm0 = _mm_slli_si128(*xmm0, 4); // r0:0:0:0 - xmm2 = _mm_xor_si128(xmm2, *xmm0); // X+r3+r2+r1+r0:X+r2+r1+r0:X+r1+r0:X+r0 - *xmm0 = xmm2; // = r9:r8:r7:r6 + xword = _mm_shuffle_epi32(xword, 0x55); // X:X:X:X + xword = _mm_xor_si128(xword, *state0); // X+r3:X+r2:X+r1:X+r0 + *state0 = _mm_slli_si128(*state0, 4); // r2:r1:r0:0 + xword = _mm_xor_si128(xword, *state0); // X+r3+r2:X+r2+r1:X+r1+r0:X+r0 + *state0 = _mm_slli_si128(*state0, 4); // r1:r0:0:0 + xword = _mm_xor_si128(xword, *state0); // X+r3+r2+r1:X+r2+r1+r0:X+r1+r0:X+r0 + *state0 = _mm_slli_si128(*state0, 4); // r0:0:0:0 + xword = _mm_xor_si128(xword, *state0); // X+r3+r2+r1+r0:X+r2+r1+r0:X+r1+r0:X+r0 + *state0 = xword; // = r9:r8:r7:r6 - xmm2 = _mm_shuffle_epi32(xmm2, 0xff); // r9:r9:r9:r9 - xmm2 = _mm_xor_si128(xmm2, *xmm1); // stuff:stuff:r9+r5:r9+r4 - *xmm1 = _mm_slli_si128(*xmm1, 4); // stuff:stuff:r4:0 - xmm2 = _mm_xor_si128(xmm2, *xmm1); // stuff:stuff:r9+r5+r4:r9+r4 - *xmm1 = xmm2; // = stuff:stuff:r11:r10 + xword = _mm_shuffle_epi32(xword, 0xff); // r9:r9:r9:r9 + xword = _mm_xor_si128(xword, *state1); // stuff:stuff:r9+r5:r9+r4 + *state1 = _mm_slli_si128(*state1, 4); // stuff:stuff:r4:0 + xword = _mm_xor_si128(xword, *state1); // stuff:stuff:r9+r5+r4:r9+r4 + *state1 = xword; // = stuff:stuff:r11:r10 - /* Store xmm0 and the low half of xmm1 into rk, which is conceptually + /* Store state0 and the low half of state1 into rk, which is conceptually * an array of 24-byte elements. Since 24 is not a multiple of 16, - * rk is not necessarily aligned so just `*rk = *xmm0` doesn't work. */ - memcpy(rk, xmm0, 16); - _mm_storeu_si64(rk + 16, *xmm1); + * rk is not necessarily aligned so just `*rk = *state0` doesn't work. */ + memcpy(rk, state0, 16); + _mm_storeu_si64(rk + 16, *state1); } static void aesni_setkey_enc_192(unsigned char *rk, @@ -316,55 +315,56 @@ static void aesni_setkey_enc_192(unsigned char *rk, /* First round: use original key */ memcpy(rk, key, 24); /* aes.c guarantees that rk is aligned on a 16-byte boundary. */ - __m128i xmm0 = ((__m128i *) rk)[0]; - __m128i xmm1 = _mm_loadl_epi64(((__m128i *) rk) + 1); + __m128i state0 = ((__m128i *) rk)[0]; + __m128i state1 = _mm_loadl_epi64(((__m128i *) rk) + 1); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x01), rk + 24 * 1); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x02), rk + 24 * 2); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x04), rk + 24 * 3); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x08), rk + 24 * 4); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x10), rk + 24 * 5); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x20), rk + 24 * 6); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x40), rk + 24 * 7); - aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x80), rk + 24 * 8); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x01), rk + 24 * 1); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x02), rk + 24 * 2); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x04), rk + 24 * 3); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x08), rk + 24 * 4); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x10), rk + 24 * 5); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x20), rk + 24 * 6); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x40), rk + 24 * 7); + aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x80), rk + 24 * 8); } /* * Key expansion, 256-bit case */ -static void aesni_set_rk_256(__m128i xmm0, __m128i xmm1, __m128i xmm2, +static void aesni_set_rk_256(__m128i state0, __m128i state1, __m128i xword, __m128i *rk0, __m128i *rk1) { /* * Finish generating the next two round keys. * - * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and - * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON + * On entry state0 is r3:r2:r1:r0, state1 is r7:r6:r5:r4 and + * xword is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON + * (obtained with AESKEYGENASSIST). * * On exit, *rk0 is r11:r10:r9:r8 and *rk1 is r15:r14:r13:r12 */ - xmm2 = _mm_shuffle_epi32(xmm2, 0xff); - xmm2 = _mm_xor_si128(xmm2, xmm0); - xmm0 = _mm_slli_si128(xmm0, 4); - xmm2 = _mm_xor_si128(xmm2, xmm0); - xmm0 = _mm_slli_si128(xmm0, 4); - xmm2 = _mm_xor_si128(xmm2, xmm0); - xmm0 = _mm_slli_si128(xmm0, 4); - xmm0 = _mm_xor_si128(xmm0, xmm2); - *rk0 = xmm0; + xword = _mm_shuffle_epi32(xword, 0xff); + xword = _mm_xor_si128(xword, state0); + state0 = _mm_slli_si128(state0, 4); + xword = _mm_xor_si128(xword, state0); + state0 = _mm_slli_si128(state0, 4); + xword = _mm_xor_si128(xword, state0); + state0 = _mm_slli_si128(state0, 4); + state0 = _mm_xor_si128(state0, xword); + *rk0 = state0; - /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 ) + /* Set xword to stuff:Y:stuff:stuff with Y = subword( r11 ) * and proceed to generate next round key from there */ - xmm2 = _mm_aeskeygenassist_si128(xmm0, 0x00); - xmm2 = _mm_shuffle_epi32(xmm2, 0xaa); - xmm2 = _mm_xor_si128(xmm2, xmm1); - xmm1 = _mm_slli_si128(xmm1, 4); - xmm2 = _mm_xor_si128(xmm2, xmm1); - xmm1 = _mm_slli_si128(xmm1, 4); - xmm2 = _mm_xor_si128(xmm2, xmm1); - xmm1 = _mm_slli_si128(xmm1, 4); - xmm1 = _mm_xor_si128(xmm1, xmm2); - *rk1 = xmm1; + xword = _mm_aeskeygenassist_si128(state0, 0x00); + xword = _mm_shuffle_epi32(xword, 0xaa); + xword = _mm_xor_si128(xword, state1); + state1 = _mm_slli_si128(state1, 4); + xword = _mm_xor_si128(xword, state1); + state1 = _mm_slli_si128(state1, 4); + xword = _mm_xor_si128(xword, state1); + state1 = _mm_slli_si128(state1, 4); + state1 = _mm_xor_si128(state1, xword); + *rk1 = state1; } static void aesni_setkey_enc_256(unsigned char *rk_bytes, From dde3c6532ef74a76a0837c22eb0df7e63d5858de Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Wed, 15 Mar 2023 23:16:27 +0100 Subject: [PATCH 34/53] Fix MSVC portability MSVC doesn't have _mm_storeu_si64. Fortunately it isn't really needed here. Signed-off-by: Gilles Peskine --- library/aesni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/aesni.c b/library/aesni.c index c52e63c2f4..e6143ee5ce 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -306,7 +306,7 @@ static void aesni_set_rk_192(__m128i *state0, __m128i *state1, __m128i xword, * an array of 24-byte elements. Since 24 is not a multiple of 16, * rk is not necessarily aligned so just `*rk = *state0` doesn't work. */ memcpy(rk, state0, 16); - _mm_storeu_si64(rk + 16, *state1); + memcpy(rk + 16, state1, 8); } static void aesni_setkey_enc_192(unsigned char *rk, From 215517667fad69309ede6da4566b35af79fec7c4 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Wed, 15 Mar 2023 23:20:26 +0100 Subject: [PATCH 35/53] Travis: run selftest on Windows Signed-off-by: Gilles Peskine --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index cdb79d1aa2..ace1e5c060 100644 --- a/.travis.yml +++ b/.travis.yml @@ -79,6 +79,7 @@ jobs: # Logs appear out of sequence on Windows. Give time to catch up. - sleep 5 - scripts/windows_msbuild.bat v141 # Visual Studio 2017 + - visualc/VS2013/x64/Release/selftest.exe - name: full configuration on arm64 os: linux From 0cd9ab71075947e65d41370b62d3c5536ceaa331 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 13:06:14 +0100 Subject: [PATCH 36/53] Fix code style Signed-off-by: Gilles Peskine --- library/gcm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/library/gcm.c b/library/gcm.c index 4e8ec08815..968e7175fb 100644 --- a/library/gcm.c +++ b/library/gcm.c @@ -845,8 +845,7 @@ int mbedtls_gcm_self_test(int verbose) mbedtls_cipher_id_t cipher = MBEDTLS_CIPHER_ID_AES; size_t olen; - if (verbose != 0) - { + if (verbose != 0) { #if defined(MBEDTLS_GCM_ALT) mbedtls_printf(" GCM note: alternative implementation.\n"); #else /* MBEDTLS_GCM_ALT */ From d0185f78c0bd5d23064a8dabe9b2d9c0f7c6805f Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 13:08:18 +0100 Subject: [PATCH 37/53] Fix typo in comment Signed-off-by: Gilles Peskine --- library/aesni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/aesni.c b/library/aesni.c index e6143ee5ce..e24527c826 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -130,7 +130,7 @@ static void gcm_clmul(const __m128i aa, const __m128i bb, ff = _mm_srli_si128(ff, 8); // 0:e1+f1 ee = _mm_slli_si128(ee, 8); // e0+f0:0 *dd = _mm_xor_si128(*dd, ff); // d1:d0+e1+f1 - *cc = _mm_xor_si128(*cc, ee); // c1+e0+f1:c0 + *cc = _mm_xor_si128(*cc, ee); // c1+e0+f0:c0 } static void gcm_shift(__m128i *cc, __m128i *dd) From 148cad134a740f5f72f4c7376ae37bc5befeab28 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 13:08:42 +0100 Subject: [PATCH 38/53] Fix unaligned access if the context is moved during operation Signed-off-by: Gilles Peskine --- library/aes.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/library/aes.c b/library/aes.c index 69d4eadfa8..1f3d8ad91d 100644 --- a/library/aes.c +++ b/library/aes.c @@ -962,6 +962,35 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx, } #endif /* !MBEDTLS_AES_DECRYPT_ALT */ +#if defined(MBEDTLS_AESNI_HAVE_CODE) || \ + (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) +/* VIA Padlock and our intrinsics-based implementation of AESNI require + * the round keys to be aligned on a 16-byte boundary. We take care of this + * before creating them, but the AES context may have moved (this can happen + * if the library is called from a language with managed memory), and in later + * calls it might have a different alignment with respect to 16-byte memory. + * So we may need to realign. + */ +static void aes_maybe_realign(mbedtls_aes_context *ctx) +{ + /* We want a 16-byte alignment. Note that buf is a pointer to uint32_t + * and rk_offset is in units of uint32_t words = 4 bytes. We want a + * 4-word alignment. */ + uintptr_t current_address = (uintptr_t) (ctx->buf + ctx->rk_offset); + unsigned current_alignment = (current_address & 0x0000000f) / 4; + if (current_alignment != 0) { + unsigned new_offset = ctx->rk_offset + 4 - current_alignment; + if (new_offset >= 4) { + new_offset -= 4; + } + memmove(ctx->buf + new_offset, // new address + ctx->buf + ctx->rk_offset, // current address + (ctx->nr + 1) * 16); // number of round keys * bytes per rk + ctx->rk_offset = new_offset; + } +} +#endif + /* * AES-ECB block encryption/decryption */ @@ -976,6 +1005,7 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, #if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { + aes_maybe_realign(ctx); return mbedtls_aesni_crypt_ecb(ctx, mode, input, output); } #endif @@ -988,13 +1018,8 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) if (aes_padlock_ace > 0) { - if (mbedtls_padlock_xcryptecb(ctx, mode, input, output) == 0) { - return 0; - } - - // If padlock data misaligned, we just fall back to - // unaccelerated mode - // + aes_maybe_realign(ctx); + return mbedtls_padlock_xcryptecb(ctx, mode, input, output); } #endif From d50cfddfd7a089528983c95afbc4b01b81832de0 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 14:25:58 +0100 Subject: [PATCH 39/53] AES context copy test: clean up Don't use hexcmp to compare binary data. Improve readability. Signed-off-by: Gilles Peskine --- tests/suites/test_suite_aes.function | 30 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function index d95503ad8c..332b9a7948 100644 --- a/tests/suites/test_suite_aes.function +++ b/tests/suites/test_suite_aes.function @@ -468,32 +468,38 @@ void aes_misc_params() /* END_CASE */ /* BEGIN_CASE */ -void aes_ecb_copy_context(data_t *key_str, data_t *src_str) +void aes_ecb_copy_context(data_t *key, data_t *src) { unsigned char output1[16], output2[16], plain[16]; mbedtls_aes_context ctx1, ctx2, ctx3; + TEST_EQUAL(src->len, 16); + // Set key and encrypt with original context mbedtls_aes_init(&ctx1); - TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx1, key_str->x, - key_str->len * 8) == 0); + TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx1, key->x, + key->len * 8) == 0); TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx1, MBEDTLS_AES_ENCRYPT, - src_str->x, output1) == 0); - + src->x, output1) == 0); ctx2 = ctx1; - TEST_ASSERT(mbedtls_aes_setkey_dec(&ctx1, key_str->x, - key_str->len * 8) == 0); + + // Set key for decryption with original context + TEST_ASSERT(mbedtls_aes_setkey_dec(&ctx1, key->x, + key->len * 8) == 0); ctx3 = ctx1; + + // Wipe the original context to make sure nothing from it is used memset(&ctx1, 0, sizeof(ctx1)); - // Encrypt and decrypt with copied context + // Encrypt with copied context TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx2, MBEDTLS_AES_ENCRYPT, - src_str->x, output2) == 0); + src->x, output2) == 0); + ASSERT_COMPARE(output1, 16, output2, 16); + + // Decrypt with copied context TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx3, MBEDTLS_AES_DECRYPT, output1, plain) == 0); - - TEST_ASSERT(mbedtls_test_hexcmp(output1, output2, 16, 16) == 0); - TEST_ASSERT(mbedtls_test_hexcmp(src_str->x, plain, src_str->len, 16) == 0); + ASSERT_COMPARE(src->x, 16, plain, 16); } /* END_CASE */ From f99ec202d7dc5c3ba4ee73dc56b6862c97133634 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 14:26:47 +0100 Subject: [PATCH 40/53] AES context copy test: have one for each key size Don't use all-bytes zero as a string, it's harder to debug. This commit uses the test vectors from FIPS 197 appendix C. Signed-off-by: Gilles Peskine --- tests/suites/test_suite_aes.ecb.data | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/suites/test_suite_aes.ecb.data b/tests/suites/test_suite_aes.ecb.data index b468ac30b7..d1c88ac914 100644 --- a/tests/suites/test_suite_aes.ecb.data +++ b/tests/suites/test_suite_aes.ecb.data @@ -229,5 +229,11 @@ aes_decrypt_ecb:"000000000000000000000000000000000000000000000000000000000000000 AES-256-ECB Decrypt NIST KAT #12 aes_decrypt_ecb:"0000000000000000000000000000000000000000000000000000000000000000":"9b80eefb7ebe2d2b16247aa0efc72f5d":"e0000000000000000000000000000000":0 -AES-256-ECB Copy Context NIST KAT #1 -aes_ecb_copy_context:"c1cc358b449909a19436cfbb3f852ef8bcb5ed12ac7058325f56e6099aab1a1c":"00000000000000000000000000000000" +AES-128-ECB Copy context +aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f":"00112233445566778899aabbccddeeff" + +AES-192-ECB Copy context +aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f1011121314151617":"00112233445566778899aabbccddeeff" + +AES-256-ECB Copy context +aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f":"00112233445566778899aabbccddeeff" From 5fcdf49f0e0a36342482e378f0d84cf35bc96ea3 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 14:38:29 +0100 Subject: [PATCH 41/53] Move copy-context testing to an auxiliary function This is in preparation for running it multiple times with different alignments. This commit also fixes the fact that we weren't calling mbedtls_aes_free() on the context (we still aren't if the test fails). It's not an issue except possibly in some ALT implementations. Signed-off-by: Gilles Peskine --- tests/suites/test_suite_aes.ecb.data | 6 +- tests/suites/test_suite_aes.function | 90 ++++++++++++++++++---------- 2 files changed, 63 insertions(+), 33 deletions(-) diff --git a/tests/suites/test_suite_aes.ecb.data b/tests/suites/test_suite_aes.ecb.data index d1c88ac914..93858656f5 100644 --- a/tests/suites/test_suite_aes.ecb.data +++ b/tests/suites/test_suite_aes.ecb.data @@ -230,10 +230,10 @@ AES-256-ECB Decrypt NIST KAT #12 aes_decrypt_ecb:"0000000000000000000000000000000000000000000000000000000000000000":"9b80eefb7ebe2d2b16247aa0efc72f5d":"e0000000000000000000000000000000":0 AES-128-ECB Copy context -aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f":"00112233445566778899aabbccddeeff" +aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f" AES-192-ECB Copy context -aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f1011121314151617":"00112233445566778899aabbccddeeff" +aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f1011121314151617" AES-256-ECB Copy context -aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f":"00112233445566778899aabbccddeeff" +aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f" diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function index 332b9a7948..51a4d4dbab 100644 --- a/tests/suites/test_suite_aes.function +++ b/tests/suites/test_suite_aes.function @@ -1,5 +1,61 @@ /* BEGIN_HEADER */ #include "mbedtls/aes.h" + +/* Test AES with a copied context. + * + * master, enc and dec must be AES context objects. They don't need to + * be initialized, and are left freed. + */ +static int test_copy(const data_t *key, + mbedtls_aes_context *master, + mbedtls_aes_context *enc, + mbedtls_aes_context *dec) +{ + unsigned char plaintext[16] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + }; + unsigned char ciphertext[16]; + unsigned char output[16]; + + // Set key and encrypt with original context + mbedtls_aes_init(master); + TEST_ASSERT(mbedtls_aes_setkey_enc(master, key->x, + key->len * 8) == 0); + TEST_ASSERT(mbedtls_aes_crypt_ecb(master, MBEDTLS_AES_ENCRYPT, + plaintext, ciphertext) == 0); + *enc = *master; + + // Set key for decryption with original context + mbedtls_aes_init(master); + TEST_ASSERT(mbedtls_aes_setkey_dec(master, key->x, + key->len * 8) == 0); + *dec = *master; + + // Wipe the original context to make sure nothing from it is used + memset(master, 0, sizeof(*master)); + + // Encrypt with copied context + TEST_ASSERT(mbedtls_aes_crypt_ecb(enc, MBEDTLS_AES_ENCRYPT, + plaintext, output) == 0); + ASSERT_COMPARE(ciphertext, 16, output, 16); + mbedtls_aes_free(enc); + + // Decrypt with copied context + TEST_ASSERT(mbedtls_aes_crypt_ecb(dec, MBEDTLS_AES_DECRYPT, + ciphertext, output) == 0); + ASSERT_COMPARE(plaintext, 16, output, 16); + mbedtls_aes_free(dec); + + return 1; + +exit: + /* Bug: we may be leaving something unfreed. This is harmless + * in our built-in implementations, but might cause a memory leak + * with alternative implementations. */ + return 0; +} + /* END_HEADER */ /* BEGIN_DEPENDENCIES @@ -468,38 +524,12 @@ void aes_misc_params() /* END_CASE */ /* BEGIN_CASE */ -void aes_ecb_copy_context(data_t *key, data_t *src) +void aes_ecb_copy_context(data_t *key) { - unsigned char output1[16], output2[16], plain[16]; mbedtls_aes_context ctx1, ctx2, ctx3; - - TEST_EQUAL(src->len, 16); - - // Set key and encrypt with original context - mbedtls_aes_init(&ctx1); - TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx1, key->x, - key->len * 8) == 0); - TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx1, MBEDTLS_AES_ENCRYPT, - src->x, output1) == 0); - ctx2 = ctx1; - - // Set key for decryption with original context - TEST_ASSERT(mbedtls_aes_setkey_dec(&ctx1, key->x, - key->len * 8) == 0); - ctx3 = ctx1; - - // Wipe the original context to make sure nothing from it is used - memset(&ctx1, 0, sizeof(ctx1)); - - // Encrypt with copied context - TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx2, MBEDTLS_AES_ENCRYPT, - src->x, output2) == 0); - ASSERT_COMPARE(output1, 16, output2, 16); - - // Decrypt with copied context - TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx3, MBEDTLS_AES_DECRYPT, - output1, plain) == 0); - ASSERT_COMPARE(src->x, 16, plain, 16); + if (!test_copy(key, &ctx1, &ctx2, &ctx3)) { + goto exit; + } } /* END_CASE */ From 844f65dc65513312c5313f8d4f7dcd63408e5a3f Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 14:54:48 +0100 Subject: [PATCH 42/53] Explicitly test AES contexts with different alignments Don't leave it up to chance. Signed-off-by: Gilles Peskine --- tests/suites/test_suite_aes.function | 78 +++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function index 51a4d4dbab..a535086bb6 100644 --- a/tests/suites/test_suite_aes.function +++ b/tests/suites/test_suite_aes.function @@ -526,10 +526,84 @@ void aes_misc_params() /* BEGIN_CASE */ void aes_ecb_copy_context(data_t *key) { - mbedtls_aes_context ctx1, ctx2, ctx3; - if (!test_copy(key, &ctx1, &ctx2, &ctx3)) { + /* We test context copying multiple times, with different alignments + * of the original and of the copies. */ + + void *src = NULL; // Memory block containing the original context + void *enc = NULL; // Memory block containing the copy doing encryption + void *dec = NULL; // Memory block containing the copy doing decryption + + struct align1 { + char bump; + mbedtls_aes_context ctx; + }; + + /* All peak alignment */ + ASSERT_ALLOC(src, sizeof(mbedtls_aes_context)); + ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context)); + ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context)); + if (!test_copy(key, src, enc, dec)) { goto exit; } + mbedtls_free(src); + src = NULL; + mbedtls_free(enc); + enc = NULL; + mbedtls_free(dec); + dec = NULL; + + /* Original shifted */ + ASSERT_ALLOC(src, sizeof(struct align1)); + ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context)); + ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context)); + if (!test_copy(key, &((struct align1 *) src)->ctx, enc, dec)) { + goto exit; + } + mbedtls_free(src); + src = NULL; + mbedtls_free(enc); + enc = NULL; + mbedtls_free(dec); + dec = NULL; + + /* Copies shifted */ + ASSERT_ALLOC(src, sizeof(mbedtls_aes_context)); + ASSERT_ALLOC(enc, sizeof(struct align1)); + ASSERT_ALLOC(dec, sizeof(struct align1)); + if (!test_copy(key, + src, + &((struct align1 *) enc)->ctx, + &((struct align1 *) dec)->ctx)) { + goto exit; + } + mbedtls_free(src); + src = NULL; + mbedtls_free(enc); + enc = NULL; + mbedtls_free(dec); + dec = NULL; + + /* Source and copies shifted */ + ASSERT_ALLOC(src, sizeof(struct align1)); + ASSERT_ALLOC(enc, sizeof(struct align1)); + ASSERT_ALLOC(dec, sizeof(struct align1)); + if (!test_copy(key, + &((struct align1 *) src)->ctx, + &((struct align1 *) enc)->ctx, + &((struct align1 *) dec)->ctx)) { + goto exit; + } + mbedtls_free(src); + src = NULL; + mbedtls_free(enc); + enc = NULL; + mbedtls_free(dec); + dec = NULL; + +exit: + mbedtls_free(src); + mbedtls_free(enc); + mbedtls_free(dec); } /* END_CASE */ From 0f454e4642c9309f2de66fe9f9911e42c4bad822 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 14:58:46 +0100 Subject: [PATCH 43/53] Use consistent guards for padlock code The padlock feature is enabled if ``` defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) ``` with the second macro coming from `padlock.h`. The availability of the macro `MBEDTLS_PADLOCK_ALIGN16` is coincidentally equivalent to `MBEDTLS_HAVE_X86` but this is not meaningful. Signed-off-by: Gilles Peskine --- library/aes.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/library/aes.c b/library/aes.c index 1f3d8ad91d..b4b34232d1 100644 --- a/library/aes.c +++ b/library/aes.c @@ -47,8 +47,7 @@ #if !defined(MBEDTLS_AES_ALT) -#if defined(MBEDTLS_PADLOCK_C) && \ - (defined(MBEDTLS_HAVE_X86) || defined(MBEDTLS_PADLOCK_ALIGN16)) +#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) static int aes_padlock_ace = -1; #endif @@ -530,7 +529,7 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key, #endif ctx->rk_offset = 0; -#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_PADLOCK_ALIGN16) +#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) if (aes_padlock_ace == -1) { aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE); } @@ -642,7 +641,7 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key, mbedtls_aes_init(&cty); ctx->rk_offset = 0; -#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_PADLOCK_ALIGN16) +#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) if (aes_padlock_ace == -1) { aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE); } From 04fa1a4054c17489332d9eaadd1c3af45f1903ab Mon Sep 17 00:00:00 2001 From: Janos Follath Date: Thu, 16 Mar 2023 15:00:03 +0000 Subject: [PATCH 44/53] Threat Model: fix copy paste Signed-off-by: Janos Follath --- SECURITY.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index 8d3678a5ee..61e39361af 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -94,10 +94,9 @@ application's threat model, they need to be mitigated by the platform. ### Physical attacks -In this section, we consider an attacker who can attacker has access to -physical information about the hardware Mbed TLS is running on and/or can alter -the physical state of the hardware (e.g. power analysis, radio emissions or -fault injection). +In this section, we consider an attacker who has access to physical information +about the hardware Mbed TLS is running on and/or can alter the physical state +of the hardware (e.g. power analysis, radio emissions or fault injection). Mbed TLS doesn't make any security guarantees against physical attacks. If physical attacks are present in a use case or a user application's threat From dd6021caf1d31ee17219813b98bd80b9998fdb29 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 16:51:40 +0100 Subject: [PATCH 45/53] Remove the dependency of MBEDTLS_AESNI_C on MBEDTLS_HAVE_ASM AESNI can now be implemented with intrinsics. Signed-off-by: Gilles Peskine --- include/mbedtls/check_config.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h index 3065df5d94..394f1e8ea4 100644 --- a/include/mbedtls/check_config.h +++ b/include/mbedtls/check_config.h @@ -66,10 +66,6 @@ #error "MBEDTLS_HAVE_TIME_DATE without MBEDTLS_HAVE_TIME does not make sense" #endif -#if defined(MBEDTLS_AESNI_C) && !defined(MBEDTLS_HAVE_ASM) -#error "MBEDTLS_AESNI_C defined, but not all prerequisites" -#endif - #if defined(MBEDTLS_AESCE_C) && !defined(MBEDTLS_HAVE_ASM) #error "MBEDTLS_AESCE_C defined, but not all prerequisites" #endif From 0de8f853f04217ba982d08bb6393bb529eeb596a Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 17:14:59 +0100 Subject: [PATCH 46/53] Clean up AES context alignment code Use a single auxiliary function to determine rk_offset, covering both setkey_enc and setkey_dec, covering both AESNI and PADLOCK. For AESNI, only build this when using the intrinsics-based implementation, since the assembly implementation supports unaligned access. Simplify "do we need to realign?" to "is the desired offset now equal to the current offset?". Signed-off-by: Gilles Peskine --- library/aes.c | 107 ++++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/library/aes.c b/library/aes.c index b4b34232d1..91b6d431e4 100644 --- a/library/aes.c +++ b/library/aes.c @@ -504,6 +504,53 @@ void mbedtls_aes_xts_free(mbedtls_aes_xts_context *ctx) } #endif /* MBEDTLS_CIPHER_MODE_XTS */ +/* Some implementations need the round keys to be aligned. + * Return an offset to be added to buf, such that (buf + offset) is + * correctly aligned. + * Note that the offset is in units of elements of buf, i.e. 32-bit words, + * i.e. an offset of 1 means 4 bytes and so on. + */ +#if (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) || \ + defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#define MAY_NEED_TO_ALIGN +#endif +static unsigned mbedtls_aes_rk_offset(uint32_t *buf) +{ +#if defined(MAY_NEED_TO_ALIGN) + int align_16_bytes = 0; + +#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) + if (aes_padlock_ace == -1) { + aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE); + } + if (aes_padlock_ace) { + align_16_bytes = 1; + } +#endif + +#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_AESNI_INTRINSICS) + if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { + align_16_bytes = 1; + } +#endif + + if (align_16_bytes) { + /* These implementations needs 16-byte alignment + * for the round key array. */ + unsigned delta = ((uintptr_t) buf & 0x0000000fU) / 4; + if (delta == 0) { + return 0; + } else { + return 4 - delta; // 16 bytes = 4 uint32_t + } + } +#else /* MAY_NEED_TO_ALIGN */ + (void) buf; +#endif /* MAY_NEED_TO_ALIGN */ + + return 0; +} + /* * AES key schedule (encryption) */ @@ -528,27 +575,11 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key, } #endif - ctx->rk_offset = 0; -#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) - if (aes_padlock_ace == -1) { - aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE); - } - - if (aes_padlock_ace) { - ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf; - } -#endif + ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf); RK = ctx->buf + ctx->rk_offset; #if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { - /* The intrinsics-based implementation needs 16-byte alignment - * for the round key array. */ - unsigned delta = (uintptr_t) ctx->buf & 0x0000000f; - if (delta != 0) { - ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t - } - RK = ctx->buf + ctx->rk_offset; return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits); } #endif @@ -640,26 +671,7 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key, mbedtls_aes_init(&cty); - ctx->rk_offset = 0; -#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) - if (aes_padlock_ace == -1) { - aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE); - } - - if (aes_padlock_ace) { - ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf; - } -#endif -#if defined(MBEDTLS_AESNI_HAVE_CODE) - if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { - /* The intrinsics-based implementation needs 16-byte alignment - * for the round key array. */ - unsigned delta = (uintptr_t) ctx->buf & 0x0000000f; - if (delta != 0) { - ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t - } - } -#endif + ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf); RK = ctx->buf + ctx->rk_offset; /* Also checks keybits */ @@ -961,8 +973,7 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx, } #endif /* !MBEDTLS_AES_DECRYPT_ALT */ -#if defined(MBEDTLS_AESNI_HAVE_CODE) || \ - (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) +#if defined(MAY_NEED_TO_ALIGN) /* VIA Padlock and our intrinsics-based implementation of AESNI require * the round keys to be aligned on a 16-byte boundary. We take care of this * before creating them, but the AES context may have moved (this can happen @@ -972,16 +983,8 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx, */ static void aes_maybe_realign(mbedtls_aes_context *ctx) { - /* We want a 16-byte alignment. Note that buf is a pointer to uint32_t - * and rk_offset is in units of uint32_t words = 4 bytes. We want a - * 4-word alignment. */ - uintptr_t current_address = (uintptr_t) (ctx->buf + ctx->rk_offset); - unsigned current_alignment = (current_address & 0x0000000f) / 4; - if (current_alignment != 0) { - unsigned new_offset = ctx->rk_offset + 4 - current_alignment; - if (new_offset >= 4) { - new_offset -= 4; - } + unsigned new_offset = mbedtls_aes_rk_offset(ctx->buf); + if (new_offset != ctx->rk_offset) { memmove(ctx->buf + new_offset, // new address ctx->buf + ctx->rk_offset, // current address (ctx->nr + 1) * 16); // number of round keys * bytes per rk @@ -1002,9 +1005,12 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, return MBEDTLS_ERR_AES_BAD_INPUT_DATA; } +#if defined(MAY_NEED_TO_ALIGN) + aes_maybe_realign(ctx); +#endif + #if defined(MBEDTLS_AESNI_HAVE_CODE) if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { - aes_maybe_realign(ctx); return mbedtls_aesni_crypt_ecb(ctx, mode, input, output); } #endif @@ -1017,7 +1023,6 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx, #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86) if (aes_padlock_ace > 0) { - aes_maybe_realign(ctx); return mbedtls_padlock_xcryptecb(ctx, mode, input, output); } #endif From 9c682e724a35cda220ee26f91f62bd8bebc24654 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 17:21:33 +0100 Subject: [PATCH 47/53] AESNI: Overhaul implementation selection Have clearly separated code to: * determine whether the assembly-based implementation is available; * determine whether the intrinsics-based implementation is available; * select one of the available implementations if any. Now MBEDTLS_AESNI_HAVE_CODE can be the single interface for aes.c and aesni.c to determine which AESNI is built. Change the implementation selection: now, if both implementations are available, always prefer assembly. Before, the intrinsics were used if available. This preference is to minimize disruption, and will likely be revised in a later minor release. Signed-off-by: Gilles Peskine --- library/aes.c | 4 ++-- library/aesni.c | 18 +++++++++--------- library/aesni.h | 35 ++++++++++++++++++++++++++--------- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/library/aes.c b/library/aes.c index 91b6d431e4..69da5828ac 100644 --- a/library/aes.c +++ b/library/aes.c @@ -511,7 +511,7 @@ void mbedtls_aes_xts_free(mbedtls_aes_xts_context *ctx) * i.e. an offset of 1 means 4 bytes and so on. */ #if (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) || \ - defined(MBEDTLS_HAVE_AESNI_INTRINSICS) + (defined(MBEDTLS_AESNI_C) && MBEDTLS_AESNI_HAVE_CODE == 2) #define MAY_NEED_TO_ALIGN #endif static unsigned mbedtls_aes_rk_offset(uint32_t *buf) @@ -528,7 +528,7 @@ static unsigned mbedtls_aes_rk_offset(uint32_t *buf) } #endif -#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#if defined(MBEDTLS_AESNI_C) && MBEDTLS_AESNI_HAVE_CODE == 2 if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) { align_16_bytes = 1; } diff --git a/library/aesni.c b/library/aesni.c index e24527c826..a23c5b595b 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -30,9 +30,9 @@ #include -#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) || defined(MBEDTLS_HAVE_X86_64) +#if defined(MBEDTLS_AESNI_HAVE_CODE) -#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#if MBEDTLS_AESNI_HAVE_CODE == 2 #if !defined(_WIN32) #include #endif @@ -48,7 +48,7 @@ int mbedtls_aesni_has_support(unsigned int what) static unsigned int c = 0; if (!done) { -#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#if MBEDTLS_AESNI_HAVE_CODE == 2 static unsigned info[4] = { 0, 0, 0, 0 }; #if defined(_MSC_VER) __cpuid(info, 1); @@ -56,20 +56,20 @@ int mbedtls_aesni_has_support(unsigned int what) __cpuid(1, info[0], info[1], info[2], info[3]); #endif c = info[2]; -#else +#else /* AESNI using asm */ asm ("movl $1, %%eax \n\t" "cpuid \n\t" : "=c" (c) : : "eax", "ebx", "edx"); -#endif +#endif /* MBEDTLS_AESNI_HAVE_CODE */ done = 1; } return (c & what) != 0; } -#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) +#if MBEDTLS_AESNI_HAVE_CODE == 2 /* * AES-NI AES-ECB block en(de)cryption @@ -388,7 +388,7 @@ static void aesni_setkey_enc_256(unsigned char *rk_bytes, aesni_set_rk_256(rk[12], rk[13], _mm_aeskeygenassist_si128(rk[13], 0x40), &rk[14], &rk[15]); } -#else /* MBEDTLS_HAVE_AESNI_INTRINSICS */ +#else /* MBEDTLS_AESNI_HAVE_CODE == 1 */ #if defined(__has_feature) #if __has_feature(memory_sanitizer) @@ -776,7 +776,7 @@ static void aesni_setkey_enc_256(unsigned char *rk, : "memory", "cc", "0"); } -#endif /* MBEDTLS_HAVE_AESNI_INTRINSICS */ +#endif /* MBEDTLS_AESNI_HAVE_CODE */ /* * Key expansion, wrapper @@ -795,6 +795,6 @@ int mbedtls_aesni_setkey_enc(unsigned char *rk, return 0; } -#endif /* MBEDTLS_HAVE_X86_64 */ +#endif /* MBEDTLS_AESNI_HAVE_CODE */ #endif /* MBEDTLS_AESNI_C */ diff --git a/library/aesni.h b/library/aesni.h index c1c4bdd8f8..a81a11725d 100644 --- a/library/aesni.h +++ b/library/aesni.h @@ -32,6 +32,9 @@ #define MBEDTLS_AESNI_AES 0x02000000u #define MBEDTLS_AESNI_CLMUL 0x00000002u +/* Can we do AESNI with inline assembly? + * (Only implemented with gas syntax, only for 64-bit.) + */ #if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__) && \ (defined(__amd64__) || defined(__x86_64__)) && \ !defined(MBEDTLS_HAVE_X86_64) @@ -40,19 +43,33 @@ #if defined(MBEDTLS_AESNI_C) -#if defined(MBEDTLS_HAVE_X86_64) -#define MBEDTLS_AESNI_HAVE_CODE // via assembly -#endif - +/* Can we do AESNI with intrinsics? + * (Only implemented with certain compilers, .) + */ +#undef MBEDTLS_AESNI_HAVE_INTRINSICS #if defined(_MSC_VER) -#define MBEDTLS_HAVE_AESNI_INTRINSICS +/* Visual Studio supports AESNI intrinsics since VS 2008 SP1. We only support + * VS 2013 and up for other reasons anyway, so no need to check the version. */ +#define MBEDTLS_AESNI_HAVE_INTRINSICS #endif -#if defined(__GNUC__) && defined(__AES__) -#define MBEDTLS_HAVE_AESNI_INTRINSICS +/* GCC-like compilers: currently, we only support intrinsics if the requisite + * target flag is enabled when building the library (e.g. `gcc -mpclmul -msse2` + * or `clang -maes -mpclmul`). */ +#if defined(__GNUC__) && defined(__AES__) && defined(__PCLMUL__) +#define MBEDTLS_AESNI_HAVE_INTRINSICS #endif -#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) -#define MBEDTLS_AESNI_HAVE_CODE // via intrinsics +/* Choose the implementation of AESNI, if one is available. */ +#undef MBEDTLS_AESNI_HAVE_CODE +/* To minimize disruption when releasing the intrinsics-based implementation, + * favor the assembly-based implementation if it's available. We intend to + * revise this in a later release of Mbed TLS 3.x. In the long run, we will + * likely remove the assembly implementation. */ +#if defined(MBEDTLS_HAVE_X86_64) +#define MBEDTLS_AESNI_HAVE_CODE 1 // via assembly +#endif +#if defined(MBEDTLS_AESNI_HAVE_INTRINSICS) +#define MBEDTLS_AESNI_HAVE_CODE 2 // via intrinsics #endif #if defined(MBEDTLS_AESNI_HAVE_CODE) From 0bfccfa5379f79f48befeee2641be739b7d8c435 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 17:49:44 +0100 Subject: [PATCH 48/53] Document the new state of AESNI support Signed-off-by: Gilles Peskine --- include/mbedtls/mbedtls_config.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h index 7daba37400..62e12152b6 100644 --- a/include/mbedtls/mbedtls_config.h +++ b/include/mbedtls/mbedtls_config.h @@ -55,7 +55,7 @@ * library/padlock.h * * Required by: - * MBEDTLS_AESNI_C + * MBEDTLS_AESNI_C (on some platforms) * MBEDTLS_PADLOCK_C * * Comment to disable the use of assembly code. @@ -2018,14 +2018,32 @@ /** * \def MBEDTLS_AESNI_C * - * Enable AES-NI support on x86-64. + * Enable AES-NI support on x86-64 or x86-32. + * + * \note AESNI is only supported with certain compilers and target options: + * - Visual Studio 2013: supported. + * - GCC, x86-64, target not explicitly supporting AESNI: + * requires MBEDTLS_HAVE_ASM. + * - GCC, x86-32, target not explicitly supporting AESNI: + * not supported. + * - GCC, x86-64 or x86-32, target supporting AESNI: supported. + * For this assembly-less implementation, you must currently compile + * `library/aesni.c` and `library/aes.c` with machine options to enable + * SSE2 and AESNI instructions: `gcc -msse2 -maes -mpclmul` or + * `clang -maes -mpclmul`. + * - Non-x86 targets: this option is silently ignored. + * - Other compilers: this option is silently ignored. + * + * \note + * Above, "GCC" includes compatible compilers such as Clang. + * The limitations on target support are likely to be relaxed in the future. * * Module: library/aesni.c * Caller: library/aes.c * - * Requires: MBEDTLS_HAVE_ASM + * Requires: MBEDTLS_HAVE_ASM (on some platforms, see note) * - * This modules adds support for the AES-NI instructions on x86-64 + * This modules adds support for the AES-NI instructions on x86. */ #define MBEDTLS_AESNI_C From 74b4223c81ac9256d531d5b688a7d5e8de5040b4 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 17:50:15 +0100 Subject: [PATCH 49/53] Announce the expanded AESNI support Signed-off-by: Gilles Peskine --- ChangeLog.d/aesni.txt | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 ChangeLog.d/aesni.txt diff --git a/ChangeLog.d/aesni.txt b/ChangeLog.d/aesni.txt new file mode 100644 index 0000000000..2d90a6e1cc --- /dev/null +++ b/ChangeLog.d/aesni.txt @@ -0,0 +1,7 @@ +Features + * AES-NI is now supported with Visual Studio. + * AES-NI is now supported in 32-bit builds, or when MBEDTLS_HAVE_ASM + is disabled, when compiling with GCC or Clang or a compatible compiler + for a target CPU that supports the requisite instructions (for example + gcc -m32 -msse2 -maes -mpclmul). (Generic x86 builds with GCC-like + compilers still require MBEDTLS_HAVE_ASM and a 64-bit target.) From 646ee7ec2ee56b0b0b4de3e7c198fcb61d7e53bd Mon Sep 17 00:00:00 2001 From: Paul Elliott Date: Thu, 16 Mar 2023 17:10:34 +0000 Subject: [PATCH 50/53] Fix CI build after repo merge conflict After merging the driver only ECDSA work, a conflict arose between that and the previous work re-ordering the ciphersuite preference list. We can remove the breaking requirement on this test, as this requirement is now auto-detected when the server5 crt is used in the server's command line. Signed-off-by: Paul Elliott --- tests/ssl-opt.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh index 7c165f3108..4fb39f31ef 100755 --- a/tests/ssl-opt.sh +++ b/tests/ssl-opt.sh @@ -11478,7 +11478,6 @@ requires_config_enabled MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED requires_ciphersuite_enabled TLS1-3-CHACHA20-POLY1305-SHA256 requires_config_enabled MBEDTLS_ECP_DP_CURVE25519_ENABLED requires_config_enabled MBEDTLS_ECP_DP_SECP256R1_ENABLED -requires_config_enabled MBEDTLS_ECDSA_C run_test "TLS 1.3: Default" \ "$P_SRV allow_sha1=0 debug_level=3 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13" \ "$P_CLI allow_sha1=0" \ From 28e4dc1e398f57230bba45f0f9451f24bf3b4016 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Thu, 16 Mar 2023 21:39:47 +0100 Subject: [PATCH 51/53] Fix use of arithmetic on void* Signed-off-by: Gilles Peskine --- tests/suites/test_suite_aes.function | 107 ++++++++++++++------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function index a535086bb6..363a5fd27c 100644 --- a/tests/suites/test_suite_aes.function +++ b/tests/suites/test_suite_aes.function @@ -529,81 +529,84 @@ void aes_ecb_copy_context(data_t *key) /* We test context copying multiple times, with different alignments * of the original and of the copies. */ - void *src = NULL; // Memory block containing the original context - void *enc = NULL; // Memory block containing the copy doing encryption - void *dec = NULL; // Memory block containing the copy doing decryption + struct align0 { + mbedtls_aes_context ctx; + }; + struct align0 *src0 = NULL; + struct align0 *enc0 = NULL; + struct align0 *dec0 = NULL; struct align1 { char bump; mbedtls_aes_context ctx; }; + struct align1 *src1 = NULL; + struct align1 *enc1 = NULL; + struct align1 *dec1 = NULL; /* All peak alignment */ - ASSERT_ALLOC(src, sizeof(mbedtls_aes_context)); - ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context)); - ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context)); - if (!test_copy(key, src, enc, dec)) { + ASSERT_ALLOC(src0, 1); + ASSERT_ALLOC(enc0, 1); + ASSERT_ALLOC(dec0, 1); + if (!test_copy(key, &src0->ctx, &enc0->ctx, &dec0->ctx)) { goto exit; } - mbedtls_free(src); - src = NULL; - mbedtls_free(enc); - enc = NULL; - mbedtls_free(dec); - dec = NULL; + mbedtls_free(src0); + src0 = NULL; + mbedtls_free(enc0); + enc0 = NULL; + mbedtls_free(dec0); + dec0 = NULL; /* Original shifted */ - ASSERT_ALLOC(src, sizeof(struct align1)); - ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context)); - ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context)); - if (!test_copy(key, &((struct align1 *) src)->ctx, enc, dec)) { + ASSERT_ALLOC(src1, 1); + ASSERT_ALLOC(enc0, 1); + ASSERT_ALLOC(dec0, 1); + if (!test_copy(key, &src1->ctx, &enc0->ctx, &dec0->ctx)) { goto exit; } - mbedtls_free(src); - src = NULL; - mbedtls_free(enc); - enc = NULL; - mbedtls_free(dec); - dec = NULL; + mbedtls_free(src1); + src1 = NULL; + mbedtls_free(enc0); + enc0 = NULL; + mbedtls_free(dec0); + dec0 = NULL; /* Copies shifted */ - ASSERT_ALLOC(src, sizeof(mbedtls_aes_context)); - ASSERT_ALLOC(enc, sizeof(struct align1)); - ASSERT_ALLOC(dec, sizeof(struct align1)); - if (!test_copy(key, - src, - &((struct align1 *) enc)->ctx, - &((struct align1 *) dec)->ctx)) { + ASSERT_ALLOC(src0, 1); + ASSERT_ALLOC(enc1, 1); + ASSERT_ALLOC(dec1, 1); + if (!test_copy(key, &src0->ctx, &enc1->ctx, &dec1->ctx)) { goto exit; } - mbedtls_free(src); - src = NULL; - mbedtls_free(enc); - enc = NULL; - mbedtls_free(dec); - dec = NULL; + mbedtls_free(src0); + src0 = NULL; + mbedtls_free(enc1); + enc1 = NULL; + mbedtls_free(dec1); + dec1 = NULL; /* Source and copies shifted */ - ASSERT_ALLOC(src, sizeof(struct align1)); - ASSERT_ALLOC(enc, sizeof(struct align1)); - ASSERT_ALLOC(dec, sizeof(struct align1)); - if (!test_copy(key, - &((struct align1 *) src)->ctx, - &((struct align1 *) enc)->ctx, - &((struct align1 *) dec)->ctx)) { + ASSERT_ALLOC(src1, 1); + ASSERT_ALLOC(enc1, 1); + ASSERT_ALLOC(dec1, 1); + if (!test_copy(key, &src1->ctx, &enc1->ctx, &dec1->ctx)) { goto exit; } - mbedtls_free(src); - src = NULL; - mbedtls_free(enc); - enc = NULL; - mbedtls_free(dec); - dec = NULL; + mbedtls_free(src1); + src1 = NULL; + mbedtls_free(enc1); + enc1 = NULL; + mbedtls_free(dec1); + dec1 = NULL; exit: - mbedtls_free(src); - mbedtls_free(enc); - mbedtls_free(dec); + mbedtls_free(src0); + mbedtls_free(enc0); + mbedtls_free(dec0); + mbedtls_free(src1); + mbedtls_free(enc1); + mbedtls_free(dec1); } /* END_CASE */ From 30e9f2a293287304f3a314c8cce6725466269e4e Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 17 Mar 2023 17:29:58 +0100 Subject: [PATCH 52/53] Finish sentence in comment Signed-off-by: Gilles Peskine --- library/aesni.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/aesni.h b/library/aesni.h index a81a11725d..2493998c67 100644 --- a/library/aesni.h +++ b/library/aesni.h @@ -44,7 +44,7 @@ #if defined(MBEDTLS_AESNI_C) /* Can we do AESNI with intrinsics? - * (Only implemented with certain compilers, .) + * (Only implemented with certain compilers, only for certain targets.) */ #undef MBEDTLS_AESNI_HAVE_INTRINSICS #if defined(_MSC_VER) From 36b9e47eed1fdf4be05d516b1311af4bc0566169 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Fri, 17 Mar 2023 17:30:29 +0100 Subject: [PATCH 53/53] Fix preprocessor conditional This was intended as an if-else-if chain. Make it so. Signed-off-by: Gilles Peskine --- library/aesni.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/library/aesni.h b/library/aesni.h index 2493998c67..51b770f316 100644 --- a/library/aesni.h +++ b/library/aesni.h @@ -67,8 +67,7 @@ * likely remove the assembly implementation. */ #if defined(MBEDTLS_HAVE_X86_64) #define MBEDTLS_AESNI_HAVE_CODE 1 // via assembly -#endif -#if defined(MBEDTLS_AESNI_HAVE_INTRINSICS) +#elif defined(MBEDTLS_AESNI_HAVE_INTRINSICS) #define MBEDTLS_AESNI_HAVE_CODE 2 // via intrinsics #endif