From 606671e06e91fd660c02db4878cbf8799551ed09 Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Fri, 17 Feb 2023 11:36:33 +0100
Subject: [PATCH 01/53] tls13: server: Check mbedtls_ssl_set_hs_psk returned
 value

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 library/ssl_tls13_server.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c
index 81c289aee5..b91cde6375 100644
--- a/library/ssl_tls13_server.c
+++ b/library/ssl_tls13_server.c
@@ -258,6 +258,8 @@ static int ssl_tls13_offered_psks_check_identity_match(
     int *psk_type,
     mbedtls_ssl_session *session)
 {
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+
     ((void) session);
     ((void) obfuscated_ticket_age);
     *psk_type = MBEDTLS_SSL_TLS1_3_PSK_EXTERNAL;
@@ -271,9 +273,13 @@ static int ssl_tls13_offered_psks_check_identity_match(
             session) == SSL_TLS1_3_OFFERED_PSK_MATCH) {
         ssl->handshake->resume = 1;
         *psk_type = MBEDTLS_SSL_TLS1_3_PSK_RESUMPTION;
-        mbedtls_ssl_set_hs_psk(ssl,
-                               session->resumption_key,
-                               session->resumption_key_len);
+        ret = mbedtls_ssl_set_hs_psk(ssl,
+                                     session->resumption_key,
+                                     session->resumption_key_len);
+        if (ret != 0) {
+            MBEDTLS_SSL_DEBUG_RET(1, "mbedtls_ssl_set_hs_psk", ret);
+            return ret;
+        }
 
         MBEDTLS_SSL_DEBUG_BUF(4, "Ticket-resumed PSK:",
                               session->resumption_key,
@@ -299,7 +305,11 @@ static int ssl_tls13_offered_psks_check_identity_match(
         identity_len == ssl->conf->psk_identity_len &&
         mbedtls_ct_memcmp(ssl->conf->psk_identity,
                           identity, identity_len) == 0) {
-        mbedtls_ssl_set_hs_psk(ssl, ssl->conf->psk, ssl->conf->psk_len);
+        ret = mbedtls_ssl_set_hs_psk(ssl, ssl->conf->psk, ssl->conf->psk_len);
+        if (ret != 0) {
+            MBEDTLS_SSL_DEBUG_RET(1, "mbedtls_ssl_set_hs_psk", ret);
+            return ret;
+        }
         return SSL_TLS1_3_OFFERED_PSK_MATCH;
     }
 

From fc7ae87ad49308024dd76519efe2b54ff0904a90 Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Thu, 16 Feb 2023 15:32:19 +0100
Subject: [PATCH 02/53] tls13: server: Check ciphersuite list length parity
 once

Check ciphersuite list length parity once,
mainly to enable the possibility of getting
out of the loop of the ciphersuites whenever
we want.

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 library/ssl_tls13_server.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c
index b91cde6375..f557d7f40e 100644
--- a/library/ssl_tls13_server.c
+++ b/library/ssl_tls13_server.c
@@ -1333,6 +1333,15 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl,
     cipher_suites_len = MBEDTLS_GET_UINT16_BE(p, 0);
     p += 2;
 
+    /*
+     * The length of the ciphersuite list has to be even.
+     */
+    if (cipher_suites_len & 1) {
+        MBEDTLS_SSL_PEND_FATAL_ALERT(MBEDTLS_SSL_ALERT_MSG_DECODE_ERROR,
+                                     MBEDTLS_ERR_SSL_DECODE_ERROR);
+        return MBEDTLS_ERR_SSL_DECODE_ERROR;
+    }
+
     /* Check we have enough data for the ciphersuite list, the legacy
      * compression methods and the length of the extensions.
      *
@@ -1362,8 +1371,6 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl,
         uint16_t cipher_suite;
         const mbedtls_ssl_ciphersuite_t *ciphersuite_info;
 
-        MBEDTLS_SSL_CHK_BUF_READ_PTR(p, cipher_suites_end, 2);
-
         cipher_suite = MBEDTLS_GET_UINT16_BE(p, 0);
         ciphersuite_info = ssl_tls13_validate_peer_ciphersuite(
             ssl, cipher_suite);

From 25e9ec61f07cd1d6fa8f483e03dbde8787f4f235 Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Thu, 16 Feb 2023 15:35:16 +0100
Subject: [PATCH 03/53] tls13: server: Select preferred cipher suite

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 library/ssl_tls13_server.c | 2 ++
 tests/ssl-opt.sh           | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c
index f557d7f40e..8aff191115 100644
--- a/library/ssl_tls13_server.c
+++ b/library/ssl_tls13_server.c
@@ -1383,6 +1383,7 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl,
         MBEDTLS_SSL_DEBUG_MSG(2, ("selected ciphersuite: %04x - %s",
                                   cipher_suite,
                                   ciphersuite_info->name));
+        break;
     }
 
     if (handshake->ciphersuite_info == NULL) {
@@ -1390,6 +1391,7 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl,
                                      MBEDTLS_ERR_SSL_HANDSHAKE_FAILURE);
         return MBEDTLS_ERR_SSL_HANDSHAKE_FAILURE;
     }
+    p = cipher_suites_end;
 
     /* ...
      * opaque legacy_compression_methods<1..2^8-1>;
diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index c176d0d628..b15fe16f7d 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -13222,7 +13222,7 @@ requires_config_enabled MBEDTLS_DEBUG_C
 requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED
-run_test    "TLS 1.3: NewSessionTicket: Basic check, G->m" \
+run_test    "TLS 1.3: NewSessionTicket: resumption failure, PSK len too big, G->m" \
             "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \
             "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \
             0 \
@@ -13231,9 +13231,9 @@ run_test    "TLS 1.3: NewSessionTicket: Basic check, G->m" \
             -s "=> write NewSessionTicket msg" \
             -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET" \
             -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET_FLUSH" \
+            -s "mbedtls_ssl_set_hs_psk() returned" \
             -s "key exchange mode: ephemeral" \
-            -s "key exchange mode: psk_ephemeral" \
-            -s "found pre_shared_key extension"
+            -S "key exchange mode: psk_ephemeral"
 
 requires_config_enabled MBEDTLS_SSL_SESSION_TICKETS
 requires_config_enabled MBEDTLS_SSL_SRV_C

From 0a1c50415636405ab17a4b5a310d83d7fba0a64f Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Mon, 20 Feb 2023 10:44:22 +0100
Subject: [PATCH 04/53] tls13: Fix session resumption with 384 bits PSKs

MBEDTLS_PSK_MAX_LEN main purpose is to determine
a miximum size for the TLS 1.2 pre-master secret.

This is not relevant to TLS 1.3 thus disable in
TLS 1.3 case the check against MBEDTLS_PSK_MAX_LEN
when setting during the handshake the PSK through
mbedtls_ssl_set_hs_psk(). This fixes the session
resumption with 384 bits PSKs when MBEDTLS_PSK_MAX_LEN
is smaller than that.

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 library/ssl_tls.c |  5 ++++-
 tests/ssl-opt.sh  | 28 +++++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/library/ssl_tls.c b/library/ssl_tls.c
index 86f5c0b555..4312f154af 100644
--- a/library/ssl_tls.c
+++ b/library/ssl_tls.c
@@ -2145,9 +2145,12 @@ int mbedtls_ssl_set_hs_psk(mbedtls_ssl_context *ssl,
         return MBEDTLS_ERR_SSL_BAD_INPUT_DATA;
     }
 
-    if (psk_len > MBEDTLS_PSK_MAX_LEN) {
+#if defined(MBEDTLS_SSL_PROTO_TLS1_2)
+    if (ssl->tls_version == MBEDTLS_SSL_VERSION_TLS1_2 &&
+        psk_len > MBEDTLS_PSK_MAX_LEN) {
         return MBEDTLS_ERR_SSL_BAD_INPUT_DATA;
     }
+#endif /* MBEDTLS_SSL_PROTO_TLS1_2 */
 
     ssl_remove_psk(ssl);
 
diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index b15fe16f7d..3f8b203a70 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -13222,7 +13222,7 @@ requires_config_enabled MBEDTLS_DEBUG_C
 requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED
-run_test    "TLS 1.3: NewSessionTicket: resumption failure, PSK len too big, G->m" \
+run_test    "TLS 1.3: NewSessionTicket: Basic check" \
             "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \
             "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \
             0 \
@@ -13231,9 +13231,31 @@ run_test    "TLS 1.3: NewSessionTicket: resumption failure, PSK len too big, G->
             -s "=> write NewSessionTicket msg" \
             -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET" \
             -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET_FLUSH" \
-            -s "mbedtls_ssl_set_hs_psk() returned" \
             -s "key exchange mode: ephemeral" \
-            -S "key exchange mode: psk_ephemeral"
+            -s "key exchange mode: psk_ephemeral" \
+            -s "found pre_shared_key extension"
+
+requires_gnutls_tls1_3
+requires_config_enabled MBEDTLS_SSL_SESSION_TICKETS
+requires_config_enabled MBEDTLS_SSL_SRV_C
+requires_config_enabled MBEDTLS_DEBUG_C
+requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \
+                             MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \
+                             MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED
+requires_ciphersuite_enabled TLS1-3-AES-256-GCM-SHA384
+run_test    "TLS 1.3: NewSessionTicket: Basic check with AES-256-GCM only, G->m" \
+            "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \
+            "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3:-CIPHER-ALL:+AES-256-GCM -V -r" \
+            0 \
+            -c "Connecting again- trying to resume previous session" \
+            -c "NEW SESSION TICKET (4) was received" \
+            -s "Ciphersuite is TLS1-3-AES-256-GCM-SHA384" \
+            -s "=> write NewSessionTicket msg" \
+            -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET" \
+            -s "server state: MBEDTLS_SSL_TLS1_3_NEW_SESSION_TICKET_FLUSH" \
+            -s "key exchange mode: ephemeral" \
+            -s "key exchange mode: psk_ephemeral" \
+            -s "found pre_shared_key extension"
 
 requires_config_enabled MBEDTLS_SSL_SESSION_TICKETS
 requires_config_enabled MBEDTLS_SSL_SRV_C

From b18c67af5fc7c47f6251c6ea0b64fcec12109ee7 Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Thu, 16 Feb 2023 16:57:16 +0100
Subject: [PATCH 05/53] tls13: ssl-opt.sh: Add test of default crypto algo

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 tests/ssl-opt.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index 3f8b203a70..68641385ac 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -11468,6 +11468,21 @@ run_test    "TLS 1.3: Test gnutls tls1_3 feature" \
             -c "Version: TLS1.3"
 
 # TLS1.3 test cases
+requires_config_enabled MBEDTLS_SSL_PROTO_TLS1_3
+requires_config_enabled MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED
+requires_ciphersuite_enabled TLS1-3-AES-128-GCM-SHA256
+requires_config_enabled MBEDTLS_ECP_DP_CURVE25519_ENABLED
+requires_config_enabled MBEDTLS_ECP_DP_SECP256R1_ENABLED
+requires_config_enabled MBEDTLS_ECDSA_C
+run_test    "TLS 1.3: Default" \
+            "$P_SRV allow_sha1=0 debug_level=3 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13" \
+            "$P_CLI allow_sha1=0" \
+            0 \
+            -s "Protocol is TLSv1.3" \
+            -s "Ciphersuite is TLS1-3-AES-128-GCM-SHA256" \
+            -s "ECDH group: x25519" \
+            -s "selected signature algorithm ecdsa_secp256r1_sha256"
+
 requires_openssl_tls1_3
 requires_config_enabled MBEDTLS_DEBUG_C
 requires_config_enabled MBEDTLS_SSL_CLI_C

From 4bb67736400540f3f2afb38e2abfae6cfd02018a Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Thu, 16 Feb 2023 15:51:18 +0100
Subject: [PATCH 06/53] tls13: Apply same preference rules for ciphersuites as
 for TLS 1.2

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 library/ssl_ciphersuites.c |  4 ++--
 tests/ssl-opt.sh           | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/library/ssl_ciphersuites.c b/library/ssl_ciphersuites.c
index 33789c4633..501608a5a8 100644
--- a/library/ssl_ciphersuites.c
+++ b/library/ssl_ciphersuites.c
@@ -52,9 +52,9 @@ static const int ciphersuite_preference[] =
 #else
 #if defined(MBEDTLS_SSL_PROTO_TLS1_3)
     /* TLS 1.3 ciphersuites */
-    MBEDTLS_TLS1_3_AES_128_GCM_SHA256,
-    MBEDTLS_TLS1_3_AES_256_GCM_SHA384,
     MBEDTLS_TLS1_3_CHACHA20_POLY1305_SHA256,
+    MBEDTLS_TLS1_3_AES_256_GCM_SHA384,
+    MBEDTLS_TLS1_3_AES_128_GCM_SHA256,
     MBEDTLS_TLS1_3_AES_128_CCM_SHA256,
     MBEDTLS_TLS1_3_AES_128_CCM_8_SHA256,
 #endif /* MBEDTLS_SSL_PROTO_TLS1_3 */
diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index 68641385ac..3b493ee391 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -11470,7 +11470,7 @@ run_test    "TLS 1.3: Test gnutls tls1_3 feature" \
 # TLS1.3 test cases
 requires_config_enabled MBEDTLS_SSL_PROTO_TLS1_3
 requires_config_enabled MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED
-requires_ciphersuite_enabled TLS1-3-AES-128-GCM-SHA256
+requires_ciphersuite_enabled TLS1-3-CHACHA20-POLY1305-SHA256
 requires_config_enabled MBEDTLS_ECP_DP_CURVE25519_ENABLED
 requires_config_enabled MBEDTLS_ECP_DP_SECP256R1_ENABLED
 requires_config_enabled MBEDTLS_ECDSA_C
@@ -11479,7 +11479,7 @@ run_test    "TLS 1.3: Default" \
             "$P_CLI allow_sha1=0" \
             0 \
             -s "Protocol is TLSv1.3" \
-            -s "Ciphersuite is TLS1-3-AES-128-GCM-SHA256" \
+            -s "Ciphersuite is TLS1-3-CHACHA20-POLY1305-SHA256" \
             -s "ECDH group: x25519" \
             -s "selected signature algorithm ecdsa_secp256r1_sha256"
 
@@ -11503,7 +11503,7 @@ run_test    "TLS 1.3: minimal feature sets - openssl" \
             -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \
             -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \
             -c "<= ssl_tls13_process_server_hello" \
-            -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \
+            -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \
             -c "ECDH curve: x25519" \
             -c "=> ssl_tls13_process_server_hello" \
             -c "<= parse encrypted extensions" \
@@ -11537,7 +11537,7 @@ run_test    "TLS 1.3: minimal feature sets - gnutls" \
             -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \
             -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \
             -c "<= ssl_tls13_process_server_hello" \
-            -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \
+            -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \
             -c "ECDH curve: x25519" \
             -c "=> ssl_tls13_process_server_hello" \
             -c "<= parse encrypted extensions" \
@@ -11570,7 +11570,7 @@ run_test    "TLS 1.3: alpn - openssl" \
             -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \
             -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \
             -c "<= ssl_tls13_process_server_hello" \
-            -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \
+            -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \
             -c "ECDH curve: x25519" \
             -c "=> ssl_tls13_process_server_hello" \
             -c "<= parse encrypted extensions" \
@@ -11606,7 +11606,7 @@ run_test    "TLS 1.3: alpn - gnutls" \
             -c "client state: MBEDTLS_SSL_FLUSH_BUFFERS" \
             -c "client state: MBEDTLS_SSL_HANDSHAKE_WRAPUP" \
             -c "<= ssl_tls13_process_server_hello" \
-            -c "server hello, chosen ciphersuite: ( 1301 ) - TLS1-3-AES-128-GCM-SHA256" \
+            -c "server hello, chosen ciphersuite: ( 1303 ) - TLS1-3-CHACHA20-POLY1305-SHA256" \
             -c "ECDH curve: x25519" \
             -c "=> ssl_tls13_process_server_hello" \
             -c "<= parse encrypted extensions" \

From 675d97d42e16fa4e55d80d76596b36c56730589b Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Fri, 17 Feb 2023 15:49:03 +0100
Subject: [PATCH 07/53] Add change log

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 .../tls13-reorder-ciphersuite-preference-list.txt     | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt

diff --git a/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt
new file mode 100644
index 0000000000..948bc882a4
--- /dev/null
+++ b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt
@@ -0,0 +1,11 @@
+Default behavior changes
+   * The default priority order of TLS 1.3 cipher suites has been modified to
+     follow the same rules as the TLS 1.2 cipher suites (see
+     ssl_ciphersuites.c).
+
+Bugfix
+   * In the TLS 1.3 server, select the prefered client cipher suite, not the
+     least prefered. The selection error was introduced in Mbed TLS 3.3.0.
+   * Fix TLS 1.3 session resumption when the established pre-shared key is
+     384 bits long. That is the length of pre-shared keys created under a
+     session where the cipher suite is TLS_AES_256_GCM_SHA384.

From d89360b87bb5bcf09a21c74a799056c49260b260 Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Tue, 21 Feb 2023 08:53:33 +0100
Subject: [PATCH 08/53] Fix and improve documentation, comments and logs

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt | 7 ++++---
 library/ssl_tls13_server.c                                | 5 +++++
 tests/ssl-opt.sh                                          | 5 ++++-
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt
index 948bc882a4..1d3406854d 100644
--- a/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt
+++ b/ChangeLog.d/tls13-reorder-ciphersuite-preference-list.txt
@@ -1,11 +1,12 @@
 Default behavior changes
    * The default priority order of TLS 1.3 cipher suites has been modified to
      follow the same rules as the TLS 1.2 cipher suites (see
-     ssl_ciphersuites.c).
+     ssl_ciphersuites.c). The preferred cipher suite is now
+     TLS_CHACHA20_POLY1305_SHA256.
 
 Bugfix
-   * In the TLS 1.3 server, select the prefered client cipher suite, not the
-     least prefered. The selection error was introduced in Mbed TLS 3.3.0.
+   * In the TLS 1.3 server, select the preferred client cipher suite, not the
+     least preferred. The selection error was introduced in Mbed TLS 3.3.0.
    * Fix TLS 1.3 session resumption when the established pre-shared key is
      384 bits long. That is the length of pre-shared keys created under a
      session where the cipher suite is TLS_AES_256_GCM_SHA384.
diff --git a/library/ssl_tls13_server.c b/library/ssl_tls13_server.c
index 8aff191115..005a1d7999 100644
--- a/library/ssl_tls13_server.c
+++ b/library/ssl_tls13_server.c
@@ -1371,6 +1371,11 @@ static int ssl_tls13_parse_client_hello(mbedtls_ssl_context *ssl,
         uint16_t cipher_suite;
         const mbedtls_ssl_ciphersuite_t *ciphersuite_info;
 
+        /*
+         * "cipher_suite_end - p is even" is an invariant of the loop. As
+         * cipher_suites_end - p > 0, we have cipher_suites_end - p >= 2 and
+         * it is thus safe to read two bytes.
+         */
         cipher_suite = MBEDTLS_GET_UINT16_BE(p, 0);
         ciphersuite_info = ssl_tls13_validate_peer_ciphersuite(
             ssl, cipher_suite);
diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index 3b493ee391..b1ee654938 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -13237,7 +13237,7 @@ requires_config_enabled MBEDTLS_DEBUG_C
 requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED
-run_test    "TLS 1.3: NewSessionTicket: Basic check" \
+run_test    "TLS 1.3: NewSessionTicket: Basic check, G->m" \
             "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \
             "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \
             0 \
@@ -13257,6 +13257,9 @@ requires_config_enabled MBEDTLS_DEBUG_C
 requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED
+# Test the session resumption when the cipher suite for the original session is
+# TLS1-3-AES-256-GCM-SHA384. In that case, the PSK is 384 bits long and not
+# 256 bits long as with all the other TLS 1.3 cipher suites.
 requires_ciphersuite_enabled TLS1-3-AES-256-GCM-SHA384
 run_test    "TLS 1.3: NewSessionTicket: Basic check with AES-256-GCM only, G->m" \
             "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \

From 1aa6e8d6e9f5af25c24a2484d774f9a879aa873a Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Thu, 23 Feb 2023 09:46:54 +0100
Subject: [PATCH 09/53] Restore same PSK length enforcement

Restore same PSK length enforcement in
conf_psk and set_hs_psk, whether the
negotiated protocol is TLS 1.2 or TLS 1.3.

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 include/mbedtls/mbedtls_config.h |  2 +-
 include/mbedtls/ssl.h            | 16 +++++++++++++++-
 library/ssl_tls.c                |  5 +----
 tests/ssl-opt.sh                 |  2 +-
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 9ae51c964a..4c676c520d 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -3761,7 +3761,7 @@
  */
 //#define MBEDTLS_SSL_DTLS_MAX_BUFFERING             32768
 
-//#define MBEDTLS_PSK_MAX_LEN               32 /**< Max size of TLS pre-shared keys, in bytes (default 256 bits) */
+//#define MBEDTLS_PSK_MAX_LEN               32 /**< Max size of TLS pre-shared keys, in bytes (default 256 or 384 bits) */
 //#define MBEDTLS_SSL_COOKIE_TIMEOUT        60 /**< Default expiration delay of DTLS cookies, in seconds if HAVE_TIME, or in number of cookies issued */
 
 /**
diff --git a/include/mbedtls/ssl.h b/include/mbedtls/ssl.h
index 4b954bb458..0df142d685 100644
--- a/include/mbedtls/ssl.h
+++ b/include/mbedtls/ssl.h
@@ -599,8 +599,22 @@
  * Size defines
  */
 #if !defined(MBEDTLS_PSK_MAX_LEN)
-#define MBEDTLS_PSK_MAX_LEN            32 /* 256 bits */
+/*
+ * If the library supports TLS 1.3 tickets and the cipher suite
+ * TLS1-3-AES-256-GCM-SHA384, set the PSK maximum length to 48 instead of 32.
+ * That way, the TLS 1.3 client and server are able to resume sessions where
+ * the cipher suite was TLS1-3-AES-256-GCM-SHA384 (pre-shared keys are 48
+ * bytes long is that case).
+ */
+#if defined(MBEDTLS_SSL_PROTO_TLS1_3) && \
+    defined(MBEDTLS_SSL_SESSION_TICKETS) && \
+    defined(MBEDTLS_AES_C) && defined(MBEDTLS_GCM_C) && \
+    defined(MBEDTLS_HAS_ALG_SHA_384_VIA_MD_OR_PSA_BASED_ON_USE_PSA)
+#define MBEDTLS_PSK_MAX_LEN 48 /* 384 bits */
+#else
+#define MBEDTLS_PSK_MAX_LEN 32 /* 256 bits */
 #endif
+#endif /* !MBEDTLS_PSK_MAX_LEN */
 
 /* Dummy type used only for its size */
 union mbedtls_ssl_premaster_secret {
diff --git a/library/ssl_tls.c b/library/ssl_tls.c
index 4312f154af..86f5c0b555 100644
--- a/library/ssl_tls.c
+++ b/library/ssl_tls.c
@@ -2145,12 +2145,9 @@ int mbedtls_ssl_set_hs_psk(mbedtls_ssl_context *ssl,
         return MBEDTLS_ERR_SSL_BAD_INPUT_DATA;
     }
 
-#if defined(MBEDTLS_SSL_PROTO_TLS1_2)
-    if (ssl->tls_version == MBEDTLS_SSL_VERSION_TLS1_2 &&
-        psk_len > MBEDTLS_PSK_MAX_LEN) {
+    if (psk_len > MBEDTLS_PSK_MAX_LEN) {
         return MBEDTLS_ERR_SSL_BAD_INPUT_DATA;
     }
-#endif /* MBEDTLS_SSL_PROTO_TLS1_2 */
 
     ssl_remove_psk(ssl);
 
diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index b1ee654938..6794068731 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -13239,7 +13239,7 @@ requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED
 run_test    "TLS 1.3: NewSessionTicket: Basic check, G->m" \
             "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \
-            "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \
+            "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3:-AES-256-GCM -V -r" \
             0 \
             -c "Connecting again- trying to resume previous session" \
             -c "NEW SESSION TICKET (4) was received" \

From ee54de02b196145295037142dee589476d47f94c Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Fri, 24 Feb 2023 12:06:30 +0100
Subject: [PATCH 10/53] Fix comments

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 include/mbedtls/ssl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mbedtls/ssl.h b/include/mbedtls/ssl.h
index 0df142d685..7d9643ee48 100644
--- a/include/mbedtls/ssl.h
+++ b/include/mbedtls/ssl.h
@@ -603,8 +603,8 @@
  * If the library supports TLS 1.3 tickets and the cipher suite
  * TLS1-3-AES-256-GCM-SHA384, set the PSK maximum length to 48 instead of 32.
  * That way, the TLS 1.3 client and server are able to resume sessions where
- * the cipher suite was TLS1-3-AES-256-GCM-SHA384 (pre-shared keys are 48
- * bytes long is that case).
+ * the cipher suite is TLS1-3-AES-256-GCM-SHA384 (pre-shared keys are 48
+ * bytes long in that case).
  */
 #if defined(MBEDTLS_SSL_PROTO_TLS1_3) && \
     defined(MBEDTLS_SSL_SESSION_TICKETS) && \

From 7dc413021006df09e99f2cf6804ef46139cddd36 Mon Sep 17 00:00:00 2001
From: Ronald Cron <ronald.cron@arm.com>
Date: Fri, 24 Feb 2023 12:10:09 +0100
Subject: [PATCH 11/53] Improve GnuTLS client priority for resumption basic
 check

Signed-off-by: Ronald Cron <ronald.cron@arm.com>
---
 tests/ssl-opt.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index 6794068731..b1ee654938 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -13239,7 +13239,7 @@ requires_all_configs_enabled MBEDTLS_SSL_TLS1_3_COMPATIBILITY_MODE \
                              MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_PSK_EPHEMERAL_ENABLED
 run_test    "TLS 1.3: NewSessionTicket: Basic check, G->m" \
             "$P_SRV debug_level=4 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13 tickets=4" \
-            "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3:-AES-256-GCM -V -r" \
+            "$G_NEXT_CLI localhost -d 4 --priority=NORMAL:-VERS-ALL:+VERS-TLS1.3 -V -r" \
             0 \
             -c "Connecting again- trying to resume previous session" \
             -c "NEW SESSION TICKET (4) was received" \

From 18d417340f2ed2818b4b673b1040933ca8d97737 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Fri, 24 Feb 2023 16:00:21 +0000
Subject: [PATCH 12/53] Add Threat Model Summary

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/SECURITY.md b/SECURITY.md
index 33bbc2ff30..ae37dab778 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -18,3 +18,63 @@ goes public.
 Only the maintained branches, as listed in [`BRANCHES.md`](BRANCHES.md),
 get security fixes.
 Users are urged to always use the latest version of a maintained branch.
+
+## Threat model
+
+We use the following classification of attacks:
+
+- **Remote Attacks:** The attacker can observe and modify data sent over the
+  network. This includes observing timing of individual packets and potentially
+  delaying legitimate messages.
+- **Timing Attacks:** The attacker can gain information about the time certain
+  sets of instructions in Mbed TLS operations take.
+- **Physical Attacks:** The attacker has access to physical information about
+  the hardware Mbed TLS is running on and/or can alter the physical state of
+  the hardware.
+
+### Remote attacks
+
+Mbed TLS aims to fully protect against remote attacks. Mbed Crypto aims to
+enable the user application in providing full protection against remote
+attacks. Said protection is limited to providing security guarantees offered by
+the protocol in question. (For example Mbed TLS alone won't guarantee that the
+messages will arrive without delay, as the TLS protocol doesn't guarantee that
+either.)
+
+### Timing attacks
+
+Mbed TLS and Mbed Crypto provide limited protection against timing attacks. The
+cost of protecting against timing attacks widely varies depending on the
+granularity of the measurements and the noise present. Therefore the protection
+in Mbed TLS and Mbed Crypto is limited. We are only aiming to provide protection
+against publicly documented attacks.
+
+**Warning!** Block ciphers constitute an exception from this protection. For
+details and workarounds see the section below.
+
+#### Block Ciphers
+
+Currently there are 4 block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES.
+The Mbed TLS implementation uses lookup tables, which are vulnerable to timing
+attacks.
+
+**Workarounds:**
+
+- Turn on hardware acceleration for AES. This is supported only on selected
+  architectures and currently only available for AES. See configuration options
+  `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details.
+- Add a secure alternative implementation (typically bitslice implementation or
+  hardware acceleration) for the vulnerable cipher. See the [Alternative
+Implementations Guide](docs/architecture/alternative-implementations.md) for
+  more information.
+- Instead of a block cipher, use ChaCha20/Poly1305 for encryption and data
+  origin authentication.
+
+### Physical attacks
+
+Physical attacks are out of scope. Any attack using information about or
+influencing the physical state of the hardware is considered physical,
+independently of the attack vector. (For example Row Hammer and Screaming
+Channels are considered physical attacks.) If physical attacks are present in a
+use case or a user application's threat model, it needs to be mitigated by
+physical countermeasures.

From 24792d0a33b6283b8c84043d029698a3acb7251e Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Fri, 3 Mar 2023 14:16:12 +0000
Subject: [PATCH 13/53] Threat Model: Improve wording

Signed-off-by: Janos Follath <janos.follath@arm.com>

Co-authored-by: Dave Rodgman <dave.rodgman@arm.com>
---
 SECURITY.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index ae37dab778..50c8ffd980 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -26,8 +26,8 @@ We use the following classification of attacks:
 - **Remote Attacks:** The attacker can observe and modify data sent over the
   network. This includes observing timing of individual packets and potentially
   delaying legitimate messages.
-- **Timing Attacks:** The attacker can gain information about the time certain
-  sets of instructions in Mbed TLS operations take.
+- **Timing Attacks:** The attacker can gain information about the time taken
+  by certain sets of instructions in Mbed TLS operations.
 - **Physical Attacks:** The attacker has access to physical information about
   the hardware Mbed TLS is running on and/or can alter the physical state of
   the hardware.
@@ -47,14 +47,14 @@ Mbed TLS and Mbed Crypto provide limited protection against timing attacks. The
 cost of protecting against timing attacks widely varies depending on the
 granularity of the measurements and the noise present. Therefore the protection
 in Mbed TLS and Mbed Crypto is limited. We are only aiming to provide protection
-against publicly documented attacks.
+against publicly documented attacks, and this protection is not currently complete.
 
-**Warning!** Block ciphers constitute an exception from this protection. For
+**Warning!** Block ciphers do not yet achieve full protection. For
 details and workarounds see the section below.
 
 #### Block Ciphers
 
-Currently there are 4 block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES.
+Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES.
 The Mbed TLS implementation uses lookup tables, which are vulnerable to timing
 attacks.
 
@@ -63,7 +63,7 @@ attacks.
 - Turn on hardware acceleration for AES. This is supported only on selected
   architectures and currently only available for AES. See configuration options
   `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details.
-- Add a secure alternative implementation (typically bitslice implementation or
+- Add a secure alternative implementation (typically a bitsliced implementation or
   hardware acceleration) for the vulnerable cipher. See the [Alternative
 Implementations Guide](docs/architecture/alternative-implementations.md) for
   more information.

From 144dd7d2fa1c2dc655064ef28de91f7042a36881 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Fri, 3 Mar 2023 14:56:38 +0000
Subject: [PATCH 14/53] Threat Model: Miscellaneous clarifications

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 53 ++++++++++++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 50c8ffd980..4ed9d3807c 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -24,8 +24,8 @@ Users are urged to always use the latest version of a maintained branch.
 We use the following classification of attacks:
 
 - **Remote Attacks:** The attacker can observe and modify data sent over the
-  network. This includes observing timing of individual packets and potentially
-  delaying legitimate messages.
+  network. This includes observing the content and timing of individual packets,
+  as well as suppressing or delaying legitimate messages, and injecting messages.
 - **Timing Attacks:** The attacker can gain information about the time taken
   by certain sets of instructions in Mbed TLS operations.
 - **Physical Attacks:** The attacker has access to physical information about
@@ -34,20 +34,19 @@ We use the following classification of attacks:
 
 ### Remote attacks
 
-Mbed TLS aims to fully protect against remote attacks. Mbed Crypto aims to
-enable the user application in providing full protection against remote
-attacks. Said protection is limited to providing security guarantees offered by
-the protocol in question. (For example Mbed TLS alone won't guarantee that the
-messages will arrive without delay, as the TLS protocol doesn't guarantee that
-either.)
+Mbed TLS aims to fully protect against remote attacks and to enable the user
+application in providing full protection against remote attacks. Said
+protection is limited to providing security guarantees offered by the protocol
+in question. (For example Mbed TLS alone won't guarantee that the messages will
+arrive without delay, as the TLS protocol doesn't guarantee that either.)
 
 ### Timing attacks
 
-Mbed TLS and Mbed Crypto provide limited protection against timing attacks. The
-cost of protecting against timing attacks widely varies depending on the
-granularity of the measurements and the noise present. Therefore the protection
-in Mbed TLS and Mbed Crypto is limited. We are only aiming to provide protection
-against publicly documented attacks, and this protection is not currently complete.
+Mbed TLS provides limited protection against timing attacks. The cost of
+protecting against timing attacks widely varies depending on the granularity of
+the measurements and the noise present. Therefore the protection in Mbed TLS is
+limited. We are only aiming to provide protection against publicly documented
+attacks, and this protection is not currently complete.
 
 **Warning!** Block ciphers do not yet achieve full protection. For
 details and workarounds see the section below.
@@ -55,26 +54,26 @@ details and workarounds see the section below.
 #### Block Ciphers
 
 Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES.
-The Mbed TLS implementation uses lookup tables, which are vulnerable to timing
-attacks.
+The pure software implementation in Mbed TLS implementation uses lookup tables,
+which are vulnerable to timing attacks.
 
 **Workarounds:**
 
 - Turn on hardware acceleration for AES. This is supported only on selected
   architectures and currently only available for AES. See configuration options
   `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details.
-- Add a secure alternative implementation (typically a bitsliced implementation or
-  hardware acceleration) for the vulnerable cipher. See the [Alternative
-Implementations Guide](docs/architecture/alternative-implementations.md) for
-  more information.
-- Instead of a block cipher, use ChaCha20/Poly1305 for encryption and data
-  origin authentication.
+- Add a secure alternative implementation (typically hardware acceleration) for
+  the vulnerable cipher. See the [Alternative Implementations
+Guide](docs/architecture/alternative-implementations.md) for more information.
+- Use cryptographic mechanisms that are not based on block ciphers. In
+  particular, for authenticated encryption, use ChaCha20/Poly1305 instead of
+  block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG.
 
 ### Physical attacks
 
-Physical attacks are out of scope. Any attack using information about or
-influencing the physical state of the hardware is considered physical,
-independently of the attack vector. (For example Row Hammer and Screaming
-Channels are considered physical attacks.) If physical attacks are present in a
-use case or a user application's threat model, it needs to be mitigated by
-physical countermeasures.
+Physical attacks are out of scope (eg. power analysis or radio emissions). Any
+attack using information about or influencing the physical state of the
+hardware is considered physical, independently of the attack vector. (For
+example Row Hammer and Screaming Channels are considered physical attacks.) If
+physical attacks are present in a use case or a user application's threat
+model, it needs to be mitigated by physical countermeasures.

From 9ec195c984ba6978443086a5a7e924068210ee4d Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 6 Mar 2023 14:54:59 +0000
Subject: [PATCH 15/53] Threat Model: reorganise threat definitions

Simplify organisation by placing threat definitions in their respective
sections.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 4ed9d3807c..7981a44b64 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -23,17 +23,12 @@ Users are urged to always use the latest version of a maintained branch.
 
 We use the following classification of attacks:
 
-- **Remote Attacks:** The attacker can observe and modify data sent over the
-  network. This includes observing the content and timing of individual packets,
-  as well as suppressing or delaying legitimate messages, and injecting messages.
-- **Timing Attacks:** The attacker can gain information about the time taken
-  by certain sets of instructions in Mbed TLS operations.
-- **Physical Attacks:** The attacker has access to physical information about
-  the hardware Mbed TLS is running on and/or can alter the physical state of
-  the hardware.
-
 ### Remote attacks
 
+The attacker can observe and modify data sent over the network. This includes
+observing the content and timing of individual packets, as well as suppressing
+or delaying legitimate messages, and injecting messages.
+
 Mbed TLS aims to fully protect against remote attacks and to enable the user
 application in providing full protection against remote attacks. Said
 protection is limited to providing security guarantees offered by the protocol
@@ -42,6 +37,9 @@ arrive without delay, as the TLS protocol doesn't guarantee that either.)
 
 ### Timing attacks
 
+The attacker can gain information about the time taken by certain sets of
+instructions in Mbed TLS operations.
+
 Mbed TLS provides limited protection against timing attacks. The cost of
 protecting against timing attacks widely varies depending on the granularity of
 the measurements and the noise present. Therefore the protection in Mbed TLS is
@@ -71,6 +69,9 @@ Guide](docs/architecture/alternative-implementations.md) for more information.
 
 ### Physical attacks
 
+The attacker has access to physical information about the hardware Mbed TLS is
+running on and/or can alter the physical state of the hardware.
+
 Physical attacks are out of scope (eg. power analysis or radio emissions). Any
 attack using information about or influencing the physical state of the
 hardware is considered physical, independently of the attack vector. (For

From fef82fd39b7d3d6e6e34ad336a331ee6dbbfd8bb Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Wed, 8 Mar 2023 16:10:39 +0000
Subject: [PATCH 16/53] Threat Model: increase classification detail

Originally for the sake of simplicity there was a single category for
software based attacks, namely timing side channel attacks.

Be more precise and categorise attacks as software based whether or not
they rely on physical information.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 54 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 7981a44b64..c6345d65c8 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -35,22 +35,33 @@ protection is limited to providing security guarantees offered by the protocol
 in question. (For example Mbed TLS alone won't guarantee that the messages will
 arrive without delay, as the TLS protocol doesn't guarantee that either.)
 
-### Timing attacks
+### Local attacks
+
+The attacker is capable of running code on the same hardware as Mbed TLS, but
+there is still a security boundary between them (ie. the attacker can't for
+example read secrets from Mbed TLS' memory directly).
+
+#### Timing attacks
 
 The attacker can gain information about the time taken by certain sets of
-instructions in Mbed TLS operations.
+instructions in Mbed TLS operations. (See for example the [Flush+Reload
+paper](https://eprint.iacr.org/2013/448.pdf).)
+
+(Technically, timing information can be observed over the network or through
+physical side channels as well. Network timing attacks are less powerful than
+local and countermeasures protecting against local attacks prevent network
+attacks as well. If the timing information is gained through physical side
+channels, we consider them physical attacks and as such they are out of scope.)
 
 Mbed TLS provides limited protection against timing attacks. The cost of
 protecting against timing attacks widely varies depending on the granularity of
 the measurements and the noise present. Therefore the protection in Mbed TLS is
-limited. We are only aiming to provide protection against publicly documented
-attacks, and this protection is not currently complete.
+limited. We are only aiming to provide protection against **publicly
+documented** attacks, and this protection is not currently complete.
 
 **Warning!** Block ciphers do not yet achieve full protection. For
 details and workarounds see the section below.
 
-#### Block Ciphers
-
 Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES.
 The pure software implementation in Mbed TLS implementation uses lookup tables,
 which are vulnerable to timing attacks.
@@ -67,14 +78,35 @@ Guide](docs/architecture/alternative-implementations.md) for more information.
   particular, for authenticated encryption, use ChaCha20/Poly1305 instead of
   block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG.
 
+#### Local non-timing side channels
+
+The attacker code running on the platform has access to some sensor capable of
+picking up information on the physical state of the hardware while Mbed TLS is
+running. This can for example be any analogue to digital converter on the
+platform that is located unfortunately enough to pick up the CPU noise. (See
+for example the [Leaky Noise
+paper](https://tches.iacr.org/index.php/TCHES/article/view/8297).)
+
+Mbed TLS doesn't offer any security guarantees against local non-timing based
+side channel attacks. If local non-timing attacks are present in a use case or
+a user application's threat model, it needs to be mitigated by the platform.
+
+#### Local fault injection attacks
+
+Software running on the same hardware can affect the physical state of the
+device and introduce faults. (See for example the [Row Hammer
+paper](https://users.ece.cmu.edu/~yoonguk/papers/kim-isca14.pdf).)
+
+Mbed TLS doesn't offer any security guarantees against local fault injection
+attacks. If local fault injection attacks are present in a use case or a user
+application's threat model, it needs to be mitigated by the platform.
+
 ### Physical attacks
 
 The attacker has access to physical information about the hardware Mbed TLS is
-running on and/or can alter the physical state of the hardware.
+running on and/or can alter the physical state of the hardware (eg. power
+analysis, radio emissions or fault injection).
 
-Physical attacks are out of scope (eg. power analysis or radio emissions). Any
-attack using information about or influencing the physical state of the
-hardware is considered physical, independently of the attack vector. (For
-example Row Hammer and Screaming Channels are considered physical attacks.) If
+Mbed TLS doesn't offer any security guarantees against physical attacks. If
 physical attacks are present in a use case or a user application's threat
 model, it needs to be mitigated by physical countermeasures.

From ecaa293d32008a57eaf0b4b8af5eac769e86768d Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Wed, 8 Mar 2023 16:38:07 +0000
Subject: [PATCH 17/53] Threat model: explain dangling countermeasures

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/SECURITY.md b/SECURITY.md
index c6345d65c8..95e549f44e 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -110,3 +110,16 @@ analysis, radio emissions or fault injection).
 Mbed TLS doesn't offer any security guarantees against physical attacks. If
 physical attacks are present in a use case or a user application's threat
 model, it needs to be mitigated by physical countermeasures.
+
+### Caveats
+
+#### Out of scope countermeasures
+
+Mbed TLS has evolved organically and a well defined threat model hasn't always
+been present. Therefore, Mbed TLS might have countermeasures against attacks
+outside the above defined threat model.
+
+The presence of such countermeasures don't mean that Mbed TLS provides
+protection against a class of attacks outside of the above described threat
+model. Neither does it mean that the failure of such a countermeasure is
+considered a vulnerability.

From 3d377605f3afa0499af1720a39c951362f1b573d Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Wed, 8 Mar 2023 16:53:50 +0000
Subject: [PATCH 18/53] Threat Model: move the block cipher section

The block cipher exception affects both remote and local timing attacks.
Move them to the Caveats section and reference it from both the local
and the remote attack section.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 44 +++++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 95e549f44e..677e68555d 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -35,6 +35,11 @@ protection is limited to providing security guarantees offered by the protocol
 in question. (For example Mbed TLS alone won't guarantee that the messages will
 arrive without delay, as the TLS protocol doesn't guarantee that either.)
 
+**Warning!** Depending on network latency, the timing of messages might be
+enough to launch some timing attacks. Block ciphers do not yet achieve full
+protection against these. For details and workarounds see the [Block
+Ciphers](#block-ciphers) section.
+
 ### Local attacks
 
 The attacker is capable of running code on the same hardware as Mbed TLS, but
@@ -60,23 +65,7 @@ limited. We are only aiming to provide protection against **publicly
 documented** attacks, and this protection is not currently complete.
 
 **Warning!** Block ciphers do not yet achieve full protection. For
-details and workarounds see the section below.
-
-Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and DES.
-The pure software implementation in Mbed TLS implementation uses lookup tables,
-which are vulnerable to timing attacks.
-
-**Workarounds:**
-
-- Turn on hardware acceleration for AES. This is supported only on selected
-  architectures and currently only available for AES. See configuration options
-  `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details.
-- Add a secure alternative implementation (typically hardware acceleration) for
-  the vulnerable cipher. See the [Alternative Implementations
-Guide](docs/architecture/alternative-implementations.md) for more information.
-- Use cryptographic mechanisms that are not based on block ciphers. In
-  particular, for authenticated encryption, use ChaCha20/Poly1305 instead of
-  block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG.
+details and workarounds see the [Block Ciphers](#block-ciphers) section.
 
 #### Local non-timing side channels
 
@@ -123,3 +112,24 @@ The presence of such countermeasures don't mean that Mbed TLS provides
 protection against a class of attacks outside of the above described threat
 model. Neither does it mean that the failure of such a countermeasure is
 considered a vulnerability.
+
+#### Block ciphers
+
+Currently there are four block ciphers in Mbed TLS: AES, CAMELLIA, ARIA and
+DES. The pure software implementation in Mbed TLS implementation uses lookup
+tables, which are vulnerable to timing attacks.
+
+These timing attacks can be physical, local or depending on network latency
+even a remote. The attacks can result in key recovery.
+
+**Workarounds:**
+
+- Turn on hardware acceleration for AES. This is supported only on selected
+  architectures and currently only available for AES. See configuration options
+  `MBEDTLS_AESCE_C`, `MBEDTLS_AESNI_C` and `MBEDTLS_PADLOCK_C` for details.
+- Add a secure alternative implementation (typically hardware acceleration) for
+  the vulnerable cipher. See the [Alternative Implementations
+Guide](docs/architecture/alternative-implementations.md) for more information.
+- Use cryptographic mechanisms that are not based on block ciphers. In
+  particular, for authenticated encryption, use ChaCha20/Poly1305 instead of
+  block cipher modes. For random generation, use HMAC\_DRBG instead of CTR\_DRBG.

From d5a09400ae23c949bfcc935dcd317eefe134d163 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Wed, 8 Mar 2023 19:58:29 +0000
Subject: [PATCH 19/53] Threat Model: improve wording

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 677e68555d..d0281ace93 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -42,14 +42,14 @@ Ciphers](#block-ciphers) section.
 
 ### Local attacks
 
-The attacker is capable of running code on the same hardware as Mbed TLS, but
-there is still a security boundary between them (ie. the attacker can't for
-example read secrets from Mbed TLS' memory directly).
+The attacker can run software on the same machine. The attacker has
+insufficient privileges to directly access Mbed TLS assets such as memory and
+files.
 
 #### Timing attacks
 
-The attacker can gain information about the time taken by certain sets of
-instructions in Mbed TLS operations. (See for example the [Flush+Reload
+The attacker is able to observe the timing of instructions executed by Mbed
+TLS.(See for example the [Flush+Reload
 paper](https://eprint.iacr.org/2013/448.pdf).)
 
 (Technically, timing information can be observed over the network or through

From 042e433edad41e8dbba3d4833bfe2e9b05ef828d Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Wed, 8 Mar 2023 20:07:59 +0000
Subject: [PATCH 20/53] Threat Model: clarify attack vectors

Timing attacks can be launched by any of the main 3 attackers. Clarify
exactly how these are covered.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index d0281ace93..387221e61f 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -52,17 +52,16 @@ The attacker is able to observe the timing of instructions executed by Mbed
 TLS.(See for example the [Flush+Reload
 paper](https://eprint.iacr.org/2013/448.pdf).)
 
-(Technically, timing information can be observed over the network or through
-physical side channels as well. Network timing attacks are less powerful than
-local and countermeasures protecting against local attacks prevent network
-attacks as well. If the timing information is gained through physical side
-channels, we consider them physical attacks and as such they are out of scope.)
-
 Mbed TLS provides limited protection against timing attacks. The cost of
 protecting against timing attacks widely varies depending on the granularity of
 the measurements and the noise present. Therefore the protection in Mbed TLS is
 limited. We are only aiming to provide protection against **publicly
-documented** attacks, and this protection is not currently complete.
+documented** attacks.
+
+**Remark:** Timing information can be observed over the network or through
+physical side channels as well. Remote and physical timing attacks are covered
+in the [Remote attacks](remote-attacks) and [Physical
+attacks](physical-attacks) sections respectively.
 
 **Warning!** Block ciphers do not yet achieve full protection. For
 details and workarounds see the [Block Ciphers](#block-ciphers) section.

From d4f31c87d1cc93d8c4b830e413524cb458b89d9d Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 10 Mar 2023 22:21:47 +0100
Subject: [PATCH 21/53] Update bibliographic references

There are new versions of the Intel whitepapers and they've moved.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/library/aesni.c b/library/aesni.c
index f6b304d157..2d5ed9e0ca 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -18,8 +18,8 @@
  */
 
 /*
- * [AES-WP] http://software.intel.com/en-us/articles/intel-advanced-encryption-standard-aes-instructions-set
- * [CLMUL-WP] http://software.intel.com/en-us/articles/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode/
+ * [AES-WP] https://www.intel.com/content/www/us/en/developer/articles/tool/intel-advanced-encryption-standard-aes-instructions-set.html
+ * [CLMUL-WP] https://www.intel.com/content/www/us/en/develop/download/intel-carry-less-multiplication-instruction-and-its-usage-for-computing-the-gcm-mode.html
  */
 
 #include "common.h"
@@ -152,7 +152,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
 
          /*
           * Caryless multiplication xmm2:xmm1 = xmm0 * xmm1
-          * using [CLMUL-WP] algorithm 1 (p. 13).
+          * using [CLMUL-WP] algorithm 1 (p. 12).
           */
          "movdqa %%xmm1, %%xmm2             \n\t" // copy of b1:b0
          "movdqa %%xmm1, %%xmm3             \n\t" // same
@@ -170,7 +170,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
 
          /*
           * Now shift the result one bit to the left,
-          * taking advantage of [CLMUL-WP] eq 27 (p. 20)
+          * taking advantage of [CLMUL-WP] eq 27 (p. 18)
           */
                              "movdqa %%xmm1, %%xmm3             \n\t" // r1:r0
                              "movdqa %%xmm2, %%xmm4             \n\t" // r3:r2
@@ -188,7 +188,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
 
          /*
           * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1
-          * using [CLMUL-WP] algorithm 5 (p. 20).
+          * using [CLMUL-WP] algorithm 5 (p. 18).
           * Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted).
           */
          /* Step 2 (1) */

From d0f9b0bacc182de20dfcd86983ce7747a4215bb5 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 10 Mar 2023 22:25:13 +0100
Subject: [PATCH 22/53] Don't warn about Msan/Valgrind if AESNI isn't actually
 built

The warning is only correct if the assembly code for AESNI is built, not if
MBEDTLS_AESNI_C is activated but MBEDTLS_HAVE_ASM is disabled or the target
architecture isn't x86_64.

This is a partial fix for #7236.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/library/aesni.c b/library/aesni.c
index 2d5ed9e0ca..786dbd92fc 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -26,13 +26,6 @@
 
 #if defined(MBEDTLS_AESNI_C)
 
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
-#warning \
-    "MBEDTLS_AESNI_C is known to cause spurious error reports with some memory sanitizers as they do not understand the assembly code."
-#endif
-#endif
-
 #include "aesni.h"
 
 #include <string.h>
@@ -59,6 +52,13 @@ int mbedtls_aesni_has_support(unsigned int what)
     return (c & what) != 0;
 }
 
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#warning \
+    "MBEDTLS_AESNI_C is known to cause spurious error reports with some memory sanitizers as they do not understand the assembly code."
+#endif
+#endif
+
 /*
  * Binutils needs to be at least 2.19 to support AES-NI instructions.
  * Unfortunately, a lot of users have a lower version now (2014-04).

From c51a413c473819cc83e73aeddecbafecddf528ca Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 14 Mar 2023 12:47:27 +0000
Subject: [PATCH 23/53] Threat Model: improve wording and grammar

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 387221e61f..dcffa1d9be 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -21,7 +21,7 @@ Users are urged to always use the latest version of a maintained branch.
 
 ## Threat model
 
-We use the following classification of attacks:
+We classify attacks based on the capabilities of the attacker.
 
 ### Remote attacks
 
@@ -32,13 +32,13 @@ or delaying legitimate messages, and injecting messages.
 Mbed TLS aims to fully protect against remote attacks and to enable the user
 application in providing full protection against remote attacks. Said
 protection is limited to providing security guarantees offered by the protocol
-in question. (For example Mbed TLS alone won't guarantee that the messages will
-arrive without delay, as the TLS protocol doesn't guarantee that either.)
+being implemented. (For example Mbed TLS alone won't guarantee that the
+messages will arrive without delay, as the TLS protocol doesn't guarantee that
+either.)
 
-**Warning!** Depending on network latency, the timing of messages might be
-enough to launch some timing attacks. Block ciphers do not yet achieve full
-protection against these. For details and workarounds see the [Block
-Ciphers](#block-ciphers) section.
+**Warning!** Block ciphers do not yet achieve full protection against attackers
+who can measure the timing of packets with sufficient precision. For details
+and workarounds see the [Block Ciphers](#block-ciphers) section.
 
 ### Local attacks
 
@@ -70,14 +70,14 @@ details and workarounds see the [Block Ciphers](#block-ciphers) section.
 
 The attacker code running on the platform has access to some sensor capable of
 picking up information on the physical state of the hardware while Mbed TLS is
-running. This can for example be any analogue to digital converter on the
+running. This could for example be an analogue-to-digital converter on the
 platform that is located unfortunately enough to pick up the CPU noise. (See
 for example the [Leaky Noise
 paper](https://tches.iacr.org/index.php/TCHES/article/view/8297).)
 
-Mbed TLS doesn't offer any security guarantees against local non-timing based
+Mbed TLS doesn't make any security guarantees against local non-timing-based
 side channel attacks. If local non-timing attacks are present in a use case or
-a user application's threat model, it needs to be mitigated by the platform.
+a user application's threat model, they need to be mitigated by the platform.
 
 #### Local fault injection attacks
 
@@ -85,23 +85,23 @@ Software running on the same hardware can affect the physical state of the
 device and introduce faults. (See for example the [Row Hammer
 paper](https://users.ece.cmu.edu/~yoonguk/papers/kim-isca14.pdf).)
 
-Mbed TLS doesn't offer any security guarantees against local fault injection
+Mbed TLS doesn't make any security guarantees against local fault injection
 attacks. If local fault injection attacks are present in a use case or a user
-application's threat model, it needs to be mitigated by the platform.
+application's threat model, they need to be mitigated by the platform.
 
 ### Physical attacks
 
 The attacker has access to physical information about the hardware Mbed TLS is
-running on and/or can alter the physical state of the hardware (eg. power
+running on and/or can alter the physical state of the hardware (e.g. power
 analysis, radio emissions or fault injection).
 
-Mbed TLS doesn't offer any security guarantees against physical attacks. If
+Mbed TLS doesn't make any security guarantees against physical attacks. If
 physical attacks are present in a use case or a user application's threat
-model, it needs to be mitigated by physical countermeasures.
+model, they need to be mitigated by physical countermeasures.
 
 ### Caveats
 
-#### Out of scope countermeasures
+#### Out-of-scope countermeasures
 
 Mbed TLS has evolved organically and a well defined threat model hasn't always
 been present. Therefore, Mbed TLS might have countermeasures against attacks

From 4317a9ef1f2e4f5d5c52c2952df1d2c0423b9c6b Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 14 Mar 2023 14:49:34 +0000
Subject: [PATCH 24/53] Threat Model: clarify stance on timing attacks

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index dcffa1d9be..97fe0e7475 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -48,15 +48,20 @@ files.
 
 #### Timing attacks
 
-The attacker is able to observe the timing of instructions executed by Mbed
-TLS.(See for example the [Flush+Reload
-paper](https://eprint.iacr.org/2013/448.pdf).)
+The attacker is able to observe the timing of instructions executed by Mbed TLS
+by leveraging shared hardware that both Mbed TLS and the attacker have access
+to. Typical attack vectors include cache timings, memory bus contention and
+branch prediction.
 
 Mbed TLS provides limited protection against timing attacks. The cost of
 protecting against timing attacks widely varies depending on the granularity of
 the measurements and the noise present. Therefore the protection in Mbed TLS is
 limited. We are only aiming to provide protection against **publicly
-documented** attacks.
+documented attack techniques**.
+
+As attacks keep improving, so does Mbed TLS's protection. Mbed TLS is moving
+towards a model of fully timing-invariant code, but has not reached this point
+yet.
 
 **Remark:** Timing information can be observed over the network or through
 physical side channels as well. Remote and physical timing attacks are covered

From ba75955cd80d57314ad6ad16922dec35515cab74 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 14 Mar 2023 14:54:44 +0000
Subject: [PATCH 25/53] Threat Model: remove references

Remove references to scientific papers as they are too specific and
might be misleading.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 97fe0e7475..8d2337111c 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -76,9 +76,7 @@ details and workarounds see the [Block Ciphers](#block-ciphers) section.
 The attacker code running on the platform has access to some sensor capable of
 picking up information on the physical state of the hardware while Mbed TLS is
 running. This could for example be an analogue-to-digital converter on the
-platform that is located unfortunately enough to pick up the CPU noise. (See
-for example the [Leaky Noise
-paper](https://tches.iacr.org/index.php/TCHES/article/view/8297).)
+platform that is located unfortunately enough to pick up the CPU noise.
 
 Mbed TLS doesn't make any security guarantees against local non-timing-based
 side channel attacks. If local non-timing attacks are present in a use case or
@@ -87,8 +85,7 @@ a user application's threat model, they need to be mitigated by the platform.
 #### Local fault injection attacks
 
 Software running on the same hardware can affect the physical state of the
-device and introduce faults. (See for example the [Row Hammer
-paper](https://users.ece.cmu.edu/~yoonguk/papers/kim-isca14.pdf).)
+device and introduce faults.
 
 Mbed TLS doesn't make any security guarantees against local fault injection
 attacks. If local fault injection attacks are present in a use case or a user

From 9118bf5791306398f4a8d5be79b303de62b3d0f6 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 14 Mar 2023 15:43:24 +0000
Subject: [PATCH 26/53] Threat Model: adjust modality

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 8d2337111c..8d3678a5ee 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -25,9 +25,10 @@ We classify attacks based on the capabilities of the attacker.
 
 ### Remote attacks
 
-The attacker can observe and modify data sent over the network. This includes
-observing the content and timing of individual packets, as well as suppressing
-or delaying legitimate messages, and injecting messages.
+In this section, we consider an attacker who can observe and modify data sent
+over the network. This includes observing the content and timing of individual
+packets, as well as suppressing or delaying legitimate messages, and injecting
+messages.
 
 Mbed TLS aims to fully protect against remote attacks and to enable the user
 application in providing full protection against remote attacks. Said
@@ -42,9 +43,9 @@ and workarounds see the [Block Ciphers](#block-ciphers) section.
 
 ### Local attacks
 
-The attacker can run software on the same machine. The attacker has
-insufficient privileges to directly access Mbed TLS assets such as memory and
-files.
+In this section, we consider an attacker who can run software on the same
+machine. The attacker has insufficient privileges to directly access Mbed TLS
+assets such as memory and files.
 
 #### Timing attacks
 
@@ -93,9 +94,10 @@ application's threat model, they need to be mitigated by the platform.
 
 ### Physical attacks
 
-The attacker has access to physical information about the hardware Mbed TLS is
-running on and/or can alter the physical state of the hardware (e.g. power
-analysis, radio emissions or fault injection).
+In this section, we consider an attacker who can attacker has access to
+physical information about the hardware Mbed TLS is running on and/or can alter
+the physical state of the hardware (e.g. power analysis, radio emissions or
+fault injection).
 
 Mbed TLS doesn't make any security guarantees against physical attacks. If
 physical attacks are present in a use case or a user application's threat

From 0086f8626a7fbd82625d6696b621d1e2ad56edad Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Wed, 15 Mar 2023 13:31:48 +0000
Subject: [PATCH 27/53] Add changelog entry

PR7083 silently fixed a security vulnerability in public, this commit
adds a changelog entry for it.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 ChangeLog.d/fix-overread-in-tls13-debug.txt | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 ChangeLog.d/fix-overread-in-tls13-debug.txt

diff --git a/ChangeLog.d/fix-overread-in-tls13-debug.txt b/ChangeLog.d/fix-overread-in-tls13-debug.txt
new file mode 100644
index 0000000000..e089ce161e
--- /dev/null
+++ b/ChangeLog.d/fix-overread-in-tls13-debug.txt
@@ -0,0 +1,3 @@
+Security
+   * Fix a potential heap buffer overread in TLS 1.3 client-side when
+     MBEDTLS_DEBUG_C is enabled. This may result in an application crash.

From 4e201448824a2cd951598733513cdb5ce285c1fe Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Wed, 15 Mar 2023 19:36:03 +0100
Subject: [PATCH 28/53] Improve the presentation of assembly blocks

Uncrustify indents
```
    asm("foo"
        HELLO "bar"
              "wibble");
```
but we would like
```
    asm("foo"
        HELLO "bar"
        "wibble");
```
Make "bar" an argument of the macro HELLO, which makes the indentation from
uncrustify match the semantics (everything should be aligned to the same
column).

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.c | 236 ++++++++++++++++++++++++------------------------
 1 file changed, 118 insertions(+), 118 deletions(-)

diff --git a/library/aesni.c b/library/aesni.c
index 786dbd92fc..77243ea387 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -69,13 +69,13 @@ int mbedtls_aesni_has_support(unsigned int what)
  * Operand macros are in gas order (src, dst) as opposed to Intel order
  * (dst, src) in order to blend better into the surrounding assembly code.
  */
-#define AESDEC      ".byte 0x66,0x0F,0x38,0xDE,"
-#define AESDECLAST  ".byte 0x66,0x0F,0x38,0xDF,"
-#define AESENC      ".byte 0x66,0x0F,0x38,0xDC,"
-#define AESENCLAST  ".byte 0x66,0x0F,0x38,0xDD,"
-#define AESIMC      ".byte 0x66,0x0F,0x38,0xDB,"
-#define AESKEYGENA  ".byte 0x66,0x0F,0x3A,0xDF,"
-#define PCLMULQDQ   ".byte 0x66,0x0F,0x3A,0x44,"
+#define AESDEC(regs)      ".byte 0x66,0x0F,0x38,0xDE," regs "\n\t"
+#define AESDECLAST(regs)  ".byte 0x66,0x0F,0x38,0xDF," regs "\n\t"
+#define AESENC(regs)      ".byte 0x66,0x0F,0x38,0xDC," regs "\n\t"
+#define AESENCLAST(regs)  ".byte 0x66,0x0F,0x38,0xDD," regs "\n\t"
+#define AESIMC(regs)      ".byte 0x66,0x0F,0x38,0xDB," regs "\n\t"
+#define AESKEYGENA(regs, imm)  ".byte 0x66,0x0F,0x3A,0xDF," regs "," imm "\n\t"
+#define PCLMULQDQ(regs, imm)   ".byte 0x66,0x0F,0x3A,0x44," regs "," imm "\n\t"
 
 #define xmm0_xmm0   "0xC0"
 #define xmm0_xmm1   "0xC8"
@@ -103,25 +103,25 @@ int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx,
 
          "1:                        \n\t" // encryption loop
          "movdqu    (%1), %%xmm1    \n\t" // load round key
-         AESENC     xmm1_xmm0      "\n\t" // do round
-                                   "add       $16, %1         \n\t" // point to next round key
-                                   "subl      $1, %0          \n\t" // loop
-                                   "jnz       1b              \n\t"
-                                   "movdqu    (%1), %%xmm1    \n\t" // load round key
-         AESENCLAST xmm1_xmm0      "\n\t" // last round
-                                   "jmp       3f              \n\t"
+         AESENC(xmm1_xmm0)                // do round
+         "add       $16, %1         \n\t" // point to next round key
+         "subl      $1, %0          \n\t" // loop
+         "jnz       1b              \n\t"
+         "movdqu    (%1), %%xmm1    \n\t" // load round key
+         AESENCLAST(xmm1_xmm0)            // last round
+         "jmp       3f              \n\t"
 
-                                   "2:                        \n\t" // decryption loop
-                                   "movdqu    (%1), %%xmm1    \n\t"
-         AESDEC     xmm1_xmm0      "\n\t" // do round
-                                   "add       $16, %1         \n\t"
-                                   "subl      $1, %0          \n\t"
-                                   "jnz       2b              \n\t"
-                                   "movdqu    (%1), %%xmm1    \n\t" // load round key
-         AESDECLAST xmm1_xmm0      "\n\t" // last round
+         "2:                        \n\t" // decryption loop
+         "movdqu    (%1), %%xmm1    \n\t"
+         AESDEC(xmm1_xmm0)                // do round
+         "add       $16, %1         \n\t"
+         "subl      $1, %0          \n\t"
+         "jnz       2b              \n\t"
+         "movdqu    (%1), %%xmm1    \n\t" // load round key
+         AESDECLAST(xmm1_xmm0)            // last round
 
-                                   "3:                        \n\t"
-                                   "movdqu    %%xmm0, (%4)    \n\t" // export output
+         "3:                        \n\t"
+         "movdqu    %%xmm0, (%4)    \n\t" // export output
          :
          : "r" (ctx->nr), "r" (ctx->buf + ctx->rk_offset), "r" (mode), "r" (input), "r" (output)
          : "memory", "cc", "xmm0", "xmm1");
@@ -157,34 +157,34 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
          "movdqa %%xmm1, %%xmm2             \n\t" // copy of b1:b0
          "movdqa %%xmm1, %%xmm3             \n\t" // same
          "movdqa %%xmm1, %%xmm4             \n\t" // same
-         PCLMULQDQ xmm0_xmm1 ",0x00         \n\t" // a0*b0 = c1:c0
-         PCLMULQDQ xmm0_xmm2 ",0x11         \n\t" // a1*b1 = d1:d0
-         PCLMULQDQ xmm0_xmm3 ",0x10         \n\t" // a0*b1 = e1:e0
-         PCLMULQDQ xmm0_xmm4 ",0x01         \n\t" // a1*b0 = f1:f0
-                             "pxor %%xmm3, %%xmm4               \n\t" // e1+f1:e0+f0
-                             "movdqa %%xmm4, %%xmm3             \n\t" // same
-                             "psrldq $8, %%xmm4                 \n\t" // 0:e1+f1
-                             "pslldq $8, %%xmm3                 \n\t" // e0+f0:0
-                             "pxor %%xmm4, %%xmm2               \n\t" // d1:d0+e1+f1
-                             "pxor %%xmm3, %%xmm1               \n\t" // c1+e0+f1:c0
+         PCLMULQDQ(xmm0_xmm1, "0x00")             // a0*b0 = c1:c0
+         PCLMULQDQ(xmm0_xmm2, "0x11")             // a1*b1 = d1:d0
+         PCLMULQDQ(xmm0_xmm3, "0x10")             // a0*b1 = e1:e0
+         PCLMULQDQ(xmm0_xmm4, "0x01")             // a1*b0 = f1:f0
+         "pxor %%xmm3, %%xmm4               \n\t" // e1+f1:e0+f0
+         "movdqa %%xmm4, %%xmm3             \n\t" // same
+         "psrldq $8, %%xmm4                 \n\t" // 0:e1+f1
+         "pslldq $8, %%xmm3                 \n\t" // e0+f0:0
+         "pxor %%xmm4, %%xmm2               \n\t" // d1:d0+e1+f1
+         "pxor %%xmm3, %%xmm1               \n\t" // c1+e0+f1:c0
 
          /*
           * Now shift the result one bit to the left,
           * taking advantage of [CLMUL-WP] eq 27 (p. 18)
           */
-                             "movdqa %%xmm1, %%xmm3             \n\t" // r1:r0
-                             "movdqa %%xmm2, %%xmm4             \n\t" // r3:r2
-                             "psllq $1, %%xmm1                  \n\t" // r1<<1:r0<<1
-                             "psllq $1, %%xmm2                  \n\t" // r3<<1:r2<<1
-                             "psrlq $63, %%xmm3                 \n\t" // r1>>63:r0>>63
-                             "psrlq $63, %%xmm4                 \n\t" // r3>>63:r2>>63
-                             "movdqa %%xmm3, %%xmm5             \n\t" // r1>>63:r0>>63
-                             "pslldq $8, %%xmm3                 \n\t" // r0>>63:0
-                             "pslldq $8, %%xmm4                 \n\t" // r2>>63:0
-                             "psrldq $8, %%xmm5                 \n\t" // 0:r1>>63
-                             "por %%xmm3, %%xmm1                \n\t" // r1<<1|r0>>63:r0<<1
-                             "por %%xmm4, %%xmm2                \n\t" // r3<<1|r2>>62:r2<<1
-                             "por %%xmm5, %%xmm2                \n\t" // r3<<1|r2>>62:r2<<1|r1>>63
+         "movdqa %%xmm1, %%xmm3             \n\t" // r1:r0
+         "movdqa %%xmm2, %%xmm4             \n\t" // r3:r2
+         "psllq $1, %%xmm1                  \n\t" // r1<<1:r0<<1
+         "psllq $1, %%xmm2                  \n\t" // r3<<1:r2<<1
+         "psrlq $63, %%xmm3                 \n\t" // r1>>63:r0>>63
+         "psrlq $63, %%xmm4                 \n\t" // r3>>63:r2>>63
+         "movdqa %%xmm3, %%xmm5             \n\t" // r1>>63:r0>>63
+         "pslldq $8, %%xmm3                 \n\t" // r0>>63:0
+         "pslldq $8, %%xmm4                 \n\t" // r2>>63:0
+         "psrldq $8, %%xmm5                 \n\t" // 0:r1>>63
+         "por %%xmm3, %%xmm1                \n\t" // r1<<1|r0>>63:r0<<1
+         "por %%xmm4, %%xmm2                \n\t" // r3<<1|r2>>62:r2<<1
+         "por %%xmm5, %%xmm2                \n\t" // r3<<1|r2>>62:r2<<1|r1>>63
 
          /*
           * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1
@@ -192,44 +192,44 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
           * Currently xmm2:xmm1 holds x3:x2:x1:x0 (already shifted).
           */
          /* Step 2 (1) */
-                             "movdqa %%xmm1, %%xmm3             \n\t" // x1:x0
-                             "movdqa %%xmm1, %%xmm4             \n\t" // same
-                             "movdqa %%xmm1, %%xmm5             \n\t" // same
-                             "psllq $63, %%xmm3                 \n\t" // x1<<63:x0<<63 = stuff:a
-                             "psllq $62, %%xmm4                 \n\t" // x1<<62:x0<<62 = stuff:b
-                             "psllq $57, %%xmm5                 \n\t" // x1<<57:x0<<57 = stuff:c
+         "movdqa %%xmm1, %%xmm3             \n\t" // x1:x0
+         "movdqa %%xmm1, %%xmm4             \n\t" // same
+         "movdqa %%xmm1, %%xmm5             \n\t" // same
+         "psllq $63, %%xmm3                 \n\t" // x1<<63:x0<<63 = stuff:a
+         "psllq $62, %%xmm4                 \n\t" // x1<<62:x0<<62 = stuff:b
+         "psllq $57, %%xmm5                 \n\t" // x1<<57:x0<<57 = stuff:c
 
          /* Step 2 (2) */
-                             "pxor %%xmm4, %%xmm3               \n\t" // stuff:a+b
-                             "pxor %%xmm5, %%xmm3               \n\t" // stuff:a+b+c
-                             "pslldq $8, %%xmm3                 \n\t" // a+b+c:0
-                             "pxor %%xmm3, %%xmm1               \n\t" // x1+a+b+c:x0 = d:x0
+         "pxor %%xmm4, %%xmm3               \n\t" // stuff:a+b
+         "pxor %%xmm5, %%xmm3               \n\t" // stuff:a+b+c
+         "pslldq $8, %%xmm3                 \n\t" // a+b+c:0
+         "pxor %%xmm3, %%xmm1               \n\t" // x1+a+b+c:x0 = d:x0
 
          /* Steps 3 and 4 */
-                             "movdqa %%xmm1,%%xmm0              \n\t" // d:x0
-                             "movdqa %%xmm1,%%xmm4              \n\t" // same
-                             "movdqa %%xmm1,%%xmm5              \n\t" // same
-                             "psrlq $1, %%xmm0                  \n\t" // e1:x0>>1 = e1:e0'
-                             "psrlq $2, %%xmm4                  \n\t" // f1:x0>>2 = f1:f0'
-                             "psrlq $7, %%xmm5                  \n\t" // g1:x0>>7 = g1:g0'
-                             "pxor %%xmm4, %%xmm0               \n\t" // e1+f1:e0'+f0'
-                             "pxor %%xmm5, %%xmm0               \n\t" // e1+f1+g1:e0'+f0'+g0'
+         "movdqa %%xmm1,%%xmm0              \n\t" // d:x0
+         "movdqa %%xmm1,%%xmm4              \n\t" // same
+         "movdqa %%xmm1,%%xmm5              \n\t" // same
+         "psrlq $1, %%xmm0                  \n\t" // e1:x0>>1 = e1:e0'
+         "psrlq $2, %%xmm4                  \n\t" // f1:x0>>2 = f1:f0'
+         "psrlq $7, %%xmm5                  \n\t" // g1:x0>>7 = g1:g0'
+         "pxor %%xmm4, %%xmm0               \n\t" // e1+f1:e0'+f0'
+         "pxor %%xmm5, %%xmm0               \n\t" // e1+f1+g1:e0'+f0'+g0'
          // e0'+f0'+g0' is almost e0+f0+g0, ex\tcept for some missing
          // bits carried from d. Now get those\t bits back in.
-                             "movdqa %%xmm1,%%xmm3              \n\t" // d:x0
-                             "movdqa %%xmm1,%%xmm4              \n\t" // same
-                             "movdqa %%xmm1,%%xmm5              \n\t" // same
-                             "psllq $63, %%xmm3                 \n\t" // d<<63:stuff
-                             "psllq $62, %%xmm4                 \n\t" // d<<62:stuff
-                             "psllq $57, %%xmm5                 \n\t" // d<<57:stuff
-                             "pxor %%xmm4, %%xmm3               \n\t" // d<<63+d<<62:stuff
-                             "pxor %%xmm5, %%xmm3               \n\t" // missing bits of d:stuff
-                             "psrldq $8, %%xmm3                 \n\t" // 0:missing bits of d
-                             "pxor %%xmm3, %%xmm0               \n\t" // e1+f1+g1:e0+f0+g0
-                             "pxor %%xmm1, %%xmm0               \n\t" // h1:h0
-                             "pxor %%xmm2, %%xmm0               \n\t" // x3+h1:x2+h0
+         "movdqa %%xmm1,%%xmm3              \n\t" // d:x0
+         "movdqa %%xmm1,%%xmm4              \n\t" // same
+         "movdqa %%xmm1,%%xmm5              \n\t" // same
+         "psllq $63, %%xmm3                 \n\t" // d<<63:stuff
+         "psllq $62, %%xmm4                 \n\t" // d<<62:stuff
+         "psllq $57, %%xmm5                 \n\t" // d<<57:stuff
+         "pxor %%xmm4, %%xmm3               \n\t" // d<<63+d<<62:stuff
+         "pxor %%xmm5, %%xmm3               \n\t" // missing bits of d:stuff
+         "psrldq $8, %%xmm3                 \n\t" // 0:missing bits of d
+         "pxor %%xmm3, %%xmm0               \n\t" // e1+f1+g1:e0+f0+g0
+         "pxor %%xmm1, %%xmm0               \n\t" // h1:h0
+         "pxor %%xmm2, %%xmm0               \n\t" // x3+h1:x2+h0
 
-                             "movdqu %%xmm0, (%2)               \n\t" // done
+         "movdqu %%xmm0, (%2)               \n\t" // done
          :
          : "r" (aa), "r" (bb), "r" (cc)
          : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
@@ -255,8 +255,8 @@ void mbedtls_aesni_inverse_key(unsigned char *invkey,
 
     for (fk -= 16, ik += 16; fk > fwdkey; fk -= 16, ik += 16) {
         asm ("movdqu (%0), %%xmm0       \n\t"
-             AESIMC  xmm0_xmm0         "\n\t"
-                                       "movdqu %%xmm0, (%1)       \n\t"
+             AESIMC(xmm0_xmm0)
+             "movdqu %%xmm0, (%1)       \n\t"
              :
              : "r" (fk), "r" (ik)
              : "memory", "xmm0");
@@ -300,16 +300,16 @@ static void aesni_setkey_enc_128(unsigned char *rk,
 
          /* Main "loop" */
          "2:                                \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x01        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x02        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x04        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x08        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x10        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x20        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x40        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x80        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x1B        \n\tcall 1b \n\t"
-         AESKEYGENA xmm0_xmm1 ",0x36        \n\tcall 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x01")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x02")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x04")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x08")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x10")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x20")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x40")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x80")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x1B")      "call 1b \n\t"
+         AESKEYGENA(xmm0_xmm1, "0x36")      "call 1b \n\t"
          :
          : "r" (rk), "r" (key)
          : "memory", "cc", "0");
@@ -358,14 +358,14 @@ static void aesni_setkey_enc_192(unsigned char *rk,
          "ret                           \n\t"
 
          "2:                            \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x01    \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x02    \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x04    \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x08    \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x10    \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x20    \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x40    \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x80    \n\tcall 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x01")  "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x02")  "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x04")  "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x08")  "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x10")  "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x20")  "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x40")  "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x80")  "call 1b \n\t"
 
          :
          : "r" (rk), "r" (key)
@@ -408,31 +408,31 @@ static void aesni_setkey_enc_256(unsigned char *rk,
 
          /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
           * and proceed to generate next round key from there */
-         AESKEYGENA xmm0_xmm2 ",0x00        \n\t"
-                              "pshufd $0xaa, %%xmm2, %%xmm2      \n\t"
-                              "pxor %%xmm1, %%xmm2               \n\t"
-                              "pslldq $4, %%xmm1                 \n\t"
-                              "pxor %%xmm1, %%xmm2               \n\t"
-                              "pslldq $4, %%xmm1                 \n\t"
-                              "pxor %%xmm1, %%xmm2               \n\t"
-                              "pslldq $4, %%xmm1                 \n\t"
-                              "pxor %%xmm2, %%xmm1               \n\t"
-                              "add $16, %0                       \n\t"
-                              "movdqu %%xmm1, (%0)               \n\t"
-                              "ret                               \n\t"
+         AESKEYGENA(xmm0_xmm2, "0x00")
+         "pshufd $0xaa, %%xmm2, %%xmm2      \n\t"
+         "pxor %%xmm1, %%xmm2               \n\t"
+         "pslldq $4, %%xmm1                 \n\t"
+         "pxor %%xmm1, %%xmm2               \n\t"
+         "pslldq $4, %%xmm1                 \n\t"
+         "pxor %%xmm1, %%xmm2               \n\t"
+         "pslldq $4, %%xmm1                 \n\t"
+         "pxor %%xmm2, %%xmm1               \n\t"
+         "add $16, %0                       \n\t"
+         "movdqu %%xmm1, (%0)               \n\t"
+         "ret                               \n\t"
 
          /*
           * Main "loop" - Generating one more key than necessary,
           * see definition of mbedtls_aes_context.buf
           */
-                              "2:                                \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x01        \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x02        \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x04        \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x08        \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x10        \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x20        \n\tcall 1b \n\t"
-         AESKEYGENA xmm1_xmm2 ",0x40        \n\tcall 1b \n\t"
+         "2:                                \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x01")      "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x02")      "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x04")      "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x08")      "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x10")      "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x20")      "call 1b \n\t"
+         AESKEYGENA(xmm1_xmm2, "0x40")      "call 1b \n\t"
          :
          : "r" (rk), "r" (key)
          : "memory", "cc", "0");

From 9af58cd7f8e08a66b3e9b11106c31ab56be7e67f Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 10 Mar 2023 22:29:32 +0100
Subject: [PATCH 29/53] New preprocessor symbol indicating that AESNI support
 is present

The configuration symbol MBEDTLS_AESNI_C requests AESNI support, but it is
ignored if the platform doesn't have AESNI. This allows keeping
MBEDTLS_AESNI_C enabled (as it is in the default build) when building for
platforms other than x86_64, or when MBEDTLS_HAVE_ASM is disabled.

To facilitate maintenance, always use the symbol MBEDTLS_AESNI_HAVE_CODE to
answer the question "can I call mbedtls_aesni_xxx functions?", rather than
repeating the check `defined(MBEDTLS_AESNI_C) && ...`.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aes.c   |  6 +++---
 library/aesni.h | 22 ++++++++++++++++++++--
 library/gcm.c   |  6 +++---
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/library/aes.c b/library/aes.c
index 64392fc56b..15b505f8ab 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -541,7 +541,7 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
 #endif
     RK = ctx->buf + ctx->rk_offset;
 
-#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
         return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits);
     }
@@ -653,7 +653,7 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key,
 
     ctx->nr = cty.nr;
 
-#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
         mbedtls_aesni_inverse_key((unsigned char *) RK,
                                   (const unsigned char *) (cty.buf + cty.rk_offset), ctx->nr);
@@ -957,7 +957,7 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
         return MBEDTLS_ERR_AES_BAD_INPUT_DATA;
     }
 
-#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
         return mbedtls_aesni_crypt_ecb(ctx, mode, input, output);
     }
diff --git a/library/aesni.h b/library/aesni.h
index a842fb703b..c1c4bdd8f8 100644
--- a/library/aesni.h
+++ b/library/aesni.h
@@ -32,13 +32,30 @@
 #define MBEDTLS_AESNI_AES      0x02000000u
 #define MBEDTLS_AESNI_CLMUL    0x00000002u
 
-#if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__) &&  \
+#if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__) && \
     (defined(__amd64__) || defined(__x86_64__))   &&  \
     !defined(MBEDTLS_HAVE_X86_64)
 #define MBEDTLS_HAVE_X86_64
 #endif
 
+#if defined(MBEDTLS_AESNI_C)
+
 #if defined(MBEDTLS_HAVE_X86_64)
+#define MBEDTLS_AESNI_HAVE_CODE // via assembly
+#endif
+
+#if defined(_MSC_VER)
+#define MBEDTLS_HAVE_AESNI_INTRINSICS
+#endif
+#if defined(__GNUC__) && defined(__AES__)
+#define MBEDTLS_HAVE_AESNI_INTRINSICS
+#endif
+
+#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#define MBEDTLS_AESNI_HAVE_CODE // via intrinsics
+#endif
+
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
 
 #ifdef __cplusplus
 extern "C" {
@@ -127,6 +144,7 @@ int mbedtls_aesni_setkey_enc(unsigned char *rk,
 }
 #endif
 
-#endif /* MBEDTLS_HAVE_X86_64 */
+#endif /* MBEDTLS_AESNI_HAVE_CODE */
+#endif  /* MBEDTLS_AESNI_C */
 
 #endif /* MBEDTLS_AESNI_H */
diff --git a/library/gcm.c b/library/gcm.c
index 6d4495fd39..8e773d7e7d 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -86,7 +86,7 @@ static int gcm_gen_table(mbedtls_gcm_context *ctx)
     ctx->HL[8] = vl;
     ctx->HH[8] = vh;
 
-#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
     /* With CLMUL support, we need only h, not the rest of the table */
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
         return 0;
@@ -183,7 +183,7 @@ static void gcm_mult(mbedtls_gcm_context *ctx, const unsigned char x[16],
     unsigned char lo, hi, rem;
     uint64_t zh, zl;
 
-#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_X86_64)
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
         unsigned char h[16];
 
@@ -195,7 +195,7 @@ static void gcm_mult(mbedtls_gcm_context *ctx, const unsigned char x[16],
         mbedtls_aesni_gcm_mult(output, x, h);
         return;
     }
-#endif /* MBEDTLS_AESNI_C && MBEDTLS_HAVE_X86_64 */
+#endif /* MBEDTLS_AESNI_HAVE_CODE */
 
     lo = x[15] & 0xf;
 

From 7e67bd516db0cc6a9bcfad8567f2c1b8e1424b6a Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 10 Mar 2023 22:35:24 +0100
Subject: [PATCH 30/53] AES, GCM selftest: indicate which implementation is
 used

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aes.c | 23 +++++++++++++++++++++++
 library/gcm.c | 14 ++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/library/aes.c b/library/aes.c
index 15b505f8ab..e41810a54d 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -1729,6 +1729,29 @@ int mbedtls_aes_self_test(int verbose)
     memset(key, 0, 32);
     mbedtls_aes_init(&ctx);
 
+    if (verbose != 0) {
+#if defined(MBEDTLS_AES_ALT)
+        mbedtls_printf("  AES note: alternative implementation.\n");
+#else /* MBEDTLS_AES_ALT */
+#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
+        if (mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE)) {
+            mbedtls_printf("  AES note: using VIA Padlock.\n");
+        } else
+#endif
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
+        if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+            mbedtls_printf("  AES note: using AESNI.\n");
+        } else
+#endif
+#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
+        if (mbedtls_aesce_has_support()) {
+            mbedtls_printf("  AES note: using AESCE.\n");
+        } else
+#endif
+        mbedtls_printf("  AES note: built-in implementation.\n");
+#endif /* MBEDTLS_AES_ALT */
+    }
+
     /*
      * ECB mode
      */
diff --git a/library/gcm.c b/library/gcm.c
index 8e773d7e7d..4e8ec08815 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -845,6 +845,20 @@ int mbedtls_gcm_self_test(int verbose)
     mbedtls_cipher_id_t cipher = MBEDTLS_CIPHER_ID_AES;
     size_t olen;
 
+    if (verbose != 0)
+    {
+#if defined(MBEDTLS_GCM_ALT)
+        mbedtls_printf("  GCM note: alternative implementation.\n");
+#else /* MBEDTLS_GCM_ALT */
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
+        if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
+            mbedtls_printf("  GCM note: using AESNI.\n");
+        } else
+#endif
+        mbedtls_printf("  GCM note: built-in implementation.\n");
+#endif /* MBEDTLS_GCM_ALT */
+    }
+
     for (j = 0; j < 3; j++) {
         int key_len = 128 + 64 * j;
 

From d671917d0dd1dcc498ee384dacafdbad674d7c88 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 10 Mar 2023 22:37:11 +0100
Subject: [PATCH 31/53] AESNI: add implementation with intrinsics

As of this commit, to use the intrinsics for MBEDTLS_AESNI_C:

* With MSVC, this should be the default.
* With Clang, build with `clang -maes -mpclmul` or equivalent.
* With GCC, build with `gcc -mpclmul -msse2` or equivalent.

In particular, for now, with a GCC-like compiler, when building specifically
for a target that supports both the AES and GCM instructions, the old
implementation using assembly is selected.

This method for platform selection will likely be improved in the future.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aes.c   |  17 +++
 library/aesni.c | 339 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 355 insertions(+), 1 deletion(-)

diff --git a/library/aes.c b/library/aes.c
index e41810a54d..69d4eadfa8 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -543,6 +543,13 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
 
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+        /* The intrinsics-based implementation needs 16-byte alignment
+         * for the round key array. */
+        unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
+        if (delta != 0) {
+            ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
+        }
+        RK = ctx->buf + ctx->rk_offset;
         return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits);
     }
 #endif
@@ -643,6 +650,16 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key,
     if (aes_padlock_ace) {
         ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf;
     }
+#endif
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
+    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+        /* The intrinsics-based implementation needs 16-byte alignment
+         * for the round key array. */
+        unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
+        if (delta != 0) {
+            ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
+        }
+    }
 #endif
     RK = ctx->buf + ctx->rk_offset;
 
diff --git a/library/aesni.c b/library/aesni.c
index 77243ea387..3acc329712 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -30,7 +30,12 @@
 
 #include <string.h>
 
-#if defined(MBEDTLS_HAVE_X86_64)
+#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) || defined(MBEDTLS_HAVE_X86_64)
+
+#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#include <cpuid.h>
+#include <immintrin.h>
+#endif
 
 /*
  * AES-NI support detection routine
@@ -41,17 +46,347 @@ int mbedtls_aesni_has_support(unsigned int what)
     static unsigned int c = 0;
 
     if (!done) {
+#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+        static unsigned info[4] = { 0, 0, 0, 0 };
+#if defined(_MSC_VER)
+        __cpuid(info, 1);
+#else
+        __cpuid(1, info[0], info[1], info[2], info[3]);
+#endif
+        c = info[2];
+#else
         asm ("movl  $1, %%eax   \n\t"
              "cpuid             \n\t"
              : "=c" (c)
              :
              : "eax", "ebx", "edx");
+#endif
         done = 1;
     }
 
     return (c & what) != 0;
 }
 
+#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+
+/*
+ * AES-NI AES-ECB block en(de)cryption
+ */
+int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx,
+                            int mode,
+                            const unsigned char input[16],
+                            unsigned char output[16])
+{
+    const __m128i *rk = (const __m128i *) (ctx->buf + ctx->rk_offset);
+    unsigned nr = ctx->nr; // Number of remaining rounds
+    // Load round key 0
+    __m128i xmm0;
+    memcpy(&xmm0, input, 16);
+    xmm0 ^= *rk;
+    ++rk;
+    --nr;
+
+    if (mode == 0) {
+        while (nr != 0) {
+            xmm0 = _mm_aesdec_si128(xmm0, *rk);
+            ++rk;
+            --nr;
+        }
+        xmm0 = _mm_aesdeclast_si128(xmm0, *rk);
+    } else {
+        while (nr != 0) {
+            xmm0 = _mm_aesenc_si128(xmm0, *rk);
+            ++rk;
+            --nr;
+        }
+        xmm0 = _mm_aesenclast_si128(xmm0, *rk);
+    }
+
+    memcpy(output, &xmm0, 16);
+    return 0;
+}
+
+/*
+ * GCM multiplication: c = a times b in GF(2^128)
+ * Based on [CLMUL-WP] algorithms 1 (with equation 27) and 5.
+ */
+
+static void gcm_clmul(const __m128i aa, const __m128i bb,
+                      __m128i *cc, __m128i *dd)
+{
+    /*
+     * Caryless multiplication dd:cc = aa * bb
+     * using [CLMUL-WP] algorithm 1 (p. 12).
+     */
+    *cc = _mm_clmulepi64_si128(aa, bb, 0x00); // a0*b0 = c1:c0
+    *dd = _mm_clmulepi64_si128(aa, bb, 0x11); // a1*b1 = d1:d0
+    __m128i ee = _mm_clmulepi64_si128(aa, bb, 0x10); // a0*b1 = e1:e0
+    __m128i ff = _mm_clmulepi64_si128(aa, bb, 0x01); // a1*b0 = f1:f0
+    ff ^= ee;                                        // e1+f1:e0+f0
+    ee = ff;                                         // e1+f1:e0+f0
+    ff = _mm_srli_si128(ff, 8);                      // 0:e1+f1
+    ee = _mm_slli_si128(ee, 8);                      // e0+f0:0
+    *dd ^= ff;                                       // d1:d0+e1+f1
+    *cc ^= ee;                                       // c1+e0+f1:c0
+}
+
+static void gcm_shift(__m128i *cc, __m128i *dd)
+{
+    /*
+     * Now shift the result one bit to the left,
+     * taking advantage of [CLMUL-WP] eq 27 (p. 18)
+     */
+    //                                       // *cc = r1:r0
+    //                                       // *dd = r3:r2
+    __m128i xmm1 = _mm_slli_epi64(*cc, 1);   // r1<<1:r0<<1
+    __m128i xmm2 = _mm_slli_epi64(*dd, 1);   // r3<<1:r2<<1
+    __m128i xmm3 = _mm_srli_epi64(*cc, 63);  // r1>>63:r0>>63
+    __m128i xmm4 = _mm_srli_epi64(*dd, 63);  // r3>>63:r2>>63
+    __m128i xmm5 = _mm_srli_si128(xmm3, 8);  // 0:r1>>63
+    xmm3 = _mm_slli_si128(xmm3, 8);          // r0>>63:0
+    xmm4 = _mm_slli_si128(xmm4, 8);          // 0:r1>>63
+
+    *cc = xmm1 | xmm3;                       // r1<<1|r0>>63:r0<<1
+    *dd = xmm2 | xmm4 | xmm5;                // r3<<1|r2>>62:r2<<1|r1>>63
+}
+
+static __m128i gcm_reduce1(__m128i xx)
+{
+    //                                            // xx = x1:x0
+    /* [CLMUL-WP] Algorithm 5 Step 2 */
+    __m128i aa = _mm_slli_epi64(xx, 63);          // x1<<63:x0<<63 = stuff:a
+    __m128i bb = _mm_slli_epi64(xx, 62);          // x1<<62:x0<<62 = stuff:b
+    __m128i cc = _mm_slli_epi64(xx, 57);          // x1<<57:x0<<57 = stuff:c
+    __m128i dd = _mm_slli_si128(aa ^ bb ^ cc, 8); // a+b+c:0
+    return dd ^ xx;                               // x1+a+b+c:x0 = d:x0
+}
+
+static __m128i gcm_reduce2(__m128i dx)
+{
+    /* [CLMUL-WP] Algorithm 5 Steps 3 and 4 */
+    __m128i ee = _mm_srli_epi64(dx, 1);           // e1:x0>>1 = e1:e0'
+    __m128i ff = _mm_srli_epi64(dx, 2);           // f1:x0>>2 = f1:f0'
+    __m128i gg = _mm_srli_epi64(dx, 7);           // g1:x0>>7 = g1:g0'
+
+    // e0'+f0'+g0' is almost e0+f0+g0, except for some missing
+    // bits carried from d. Now get those bits back in.
+    __m128i eh = _mm_slli_epi64(dx, 63);          // d<<63:stuff
+    __m128i fh = _mm_slli_epi64(dx, 62);          // d<<62:stuff
+    __m128i gh = _mm_slli_epi64(dx, 57);          // d<<57:stuff
+    __m128i hh = _mm_srli_si128(eh ^ fh ^ gh, 8); // 0:missing bits of d
+
+    return ee ^ ff ^ gg ^ hh ^ dx;
+}
+
+void mbedtls_aesni_gcm_mult(unsigned char c[16],
+                            const unsigned char a[16],
+                            const unsigned char b[16])
+{
+    __m128i aa, bb, cc, dd;
+
+    /* The inputs are in big-endian order, so byte-reverse them */
+    for (size_t i = 0; i < 16; i++) {
+        ((uint8_t *) &aa)[i] = a[15 - i];
+        ((uint8_t *) &bb)[i] = b[15 - i];
+    }
+
+    gcm_clmul(aa, bb, &cc, &dd);
+    gcm_shift(&cc, &dd);
+    /*
+     * Now reduce modulo the GCM polynomial x^128 + x^7 + x^2 + x + 1
+     * using [CLMUL-WP] algorithm 5 (p. 18).
+     * Currently dd:cc holds x3:x2:x1:x0 (already shifted).
+     */
+    __m128i dx = gcm_reduce1(cc);
+    __m128i xh = gcm_reduce2(dx);
+    cc = xh ^ dd; // x3+h1:x2+h0
+
+    /* Now byte-reverse the outputs */
+    for (size_t i = 0; i < 16; i++) {
+        c[i] = ((uint8_t *) &cc)[15 - i];
+    }
+
+    return;
+}
+
+/*
+ * Compute decryption round keys from encryption round keys
+ */
+void mbedtls_aesni_inverse_key(unsigned char *invkey,
+                               const unsigned char *fwdkey, int nr)
+{
+    __m128i *ik = (__m128i *) invkey;
+    const __m128i *fk = (const __m128i *) fwdkey + nr;
+
+    *ik = *fk;
+    for (--fk, ++ik; fk > (const __m128i *) fwdkey; --fk, ++ik) {
+        *ik = _mm_aesimc_si128(*fk);
+    }
+    *ik = *fk;
+}
+
+/*
+ * Key expansion, 128-bit case
+ */
+static __m128i aesni_set_rk_128(__m128i xmm0, __m128i xmm1)
+{
+    /*
+     * Finish generating the next round key.
+     *
+     * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff
+     * with X = rot( sub( r3 ) ) ^ RCON.
+     *
+     * On exit, xmm1 is r7:r6:r5:r4
+     * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
+     * and this is returned, to be written to the round key buffer.
+     */
+    xmm1 = _mm_shuffle_epi32(xmm1, 0xff);   // X:X:X:X
+    xmm1 ^= xmm0;                           // X+r3:X+r2:X+r1:r4
+    xmm0 = _mm_slli_si128(xmm0, 4);         // r2:r1:r0:0
+    xmm1 ^= xmm0;                           // X+r3+r2:X+r2+r1:r5:r4
+    xmm0 = _mm_slli_si128(xmm0, 4);         // r1:r0:0:0
+    xmm1 ^= xmm0;                           // X+r3+r2+r1:r6:r5:r4
+    xmm0 = _mm_slli_si128(xmm0, 4);         // r0:0:0:0
+    xmm1 ^= xmm0;                           // r7:r6:r5:r4
+    return xmm1;
+}
+
+static void aesni_setkey_enc_128(unsigned char *rk_bytes,
+                                 const unsigned char *key)
+{
+    __m128i *rk = (__m128i *) rk_bytes;
+
+    memcpy(&rk[0], key, 16);
+    rk[1] = aesni_set_rk_128(rk[0], _mm_aeskeygenassist_si128(rk[0], 0x01));
+    rk[2] = aesni_set_rk_128(rk[1], _mm_aeskeygenassist_si128(rk[1], 0x02));
+    rk[3] = aesni_set_rk_128(rk[2], _mm_aeskeygenassist_si128(rk[2], 0x04));
+    rk[4] = aesni_set_rk_128(rk[3], _mm_aeskeygenassist_si128(rk[3], 0x08));
+    rk[5] = aesni_set_rk_128(rk[4], _mm_aeskeygenassist_si128(rk[4], 0x10));
+    rk[6] = aesni_set_rk_128(rk[5], _mm_aeskeygenassist_si128(rk[5], 0x20));
+    rk[7] = aesni_set_rk_128(rk[6], _mm_aeskeygenassist_si128(rk[6], 0x40));
+    rk[8] = aesni_set_rk_128(rk[7], _mm_aeskeygenassist_si128(rk[7], 0x80));
+    rk[9] = aesni_set_rk_128(rk[8], _mm_aeskeygenassist_si128(rk[8], 0x1B));
+    rk[10] = aesni_set_rk_128(rk[9], _mm_aeskeygenassist_si128(rk[9], 0x36));
+}
+
+/*
+ * Key expansion, 192-bit case
+ */
+static void aesni_set_rk_192(__m128i *xmm0, __m128i *xmm1, __m128i xmm2,
+                             unsigned char *rk)
+{
+    /*
+     * Finish generating the next 6 quarter-keys.
+     *
+     * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4
+     * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON.
+     *
+     * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
+     * and those are written to the round key buffer.
+     */
+    xmm2 = _mm_shuffle_epi32(xmm2, 0x55);     // X:X:X:X
+    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3:X+r2:X+r1:X+r0
+    *xmm0 = _mm_slli_si128(*xmm0, 4);         // r2:r1:r0:0
+    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3+r2:X+r2+r1:X+r1+r0:X+r0
+    *xmm0 = _mm_slli_si128(*xmm0, 4);         // r1:r0:0:0
+    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3+r2+r1:X+r2+r1+r0:X+r1+r0:X+r0
+    *xmm0 = _mm_slli_si128(*xmm0, 4);         // r0:0:0:0
+    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3+r2+r1+r0:X+r2+r1+r0:X+r1+r0:X+r0
+    *xmm0 = xmm2;                             // = r9:r8:r7:r6
+
+    xmm2 = _mm_shuffle_epi32(xmm2, 0xff);     // r9:r9:r9:r9
+    xmm2 = _mm_xor_si128(xmm2, *xmm1);        // stuff:stuff:r9+r5:r9+r4
+    *xmm1 = _mm_slli_si128(*xmm1, 4);         // stuff:stuff:r4:0
+    xmm2 = _mm_xor_si128(xmm2, *xmm1);        // stuff:stuff:r9+r5+r4:r9+r4
+    *xmm1 = xmm2;                             // = stuff:stuff:r11:r10
+
+    /* Store xmm0 and the low half of xmm1 into rk, which is conceptually
+     * an array of 24-byte elements. Since 24 is not a multiple of 16,
+     * rk is not necessarily aligned so just `*rk = *xmm0` doesn't work. */
+    memcpy(rk, xmm0, 16);
+    _mm_storeu_si64(rk + 16, *xmm1);
+}
+
+static void aesni_setkey_enc_192(unsigned char *rk,
+                                 const unsigned char *key)
+{
+    /* First round: use original key */
+    memcpy(rk, key, 24);
+    /* aes.c guarantees that rk is aligned on a 16-byte boundary. */
+    __m128i xmm0 = ((__m128i *) rk)[0];
+    __m128i xmm1 = _mm_loadl_epi64(((__m128i *) rk) + 1);
+
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x01), rk + 24 * 1);
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x02), rk + 24 * 2);
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x04), rk + 24 * 3);
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x08), rk + 24 * 4);
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x10), rk + 24 * 5);
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x20), rk + 24 * 6);
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x40), rk + 24 * 7);
+    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x80), rk + 24 * 8);
+}
+
+/*
+ * Key expansion, 256-bit case
+ */
+static void aesni_set_rk_256(__m128i xmm0, __m128i xmm1, __m128i xmm2,
+                             __m128i *rk0, __m128i *rk1)
+{
+    /*
+     * Finish generating the next two round keys.
+     *
+     * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and
+     * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
+     *
+     * On exit, *rk0 is r11:r10:r9:r8 and *rk1 is r15:r14:r13:r12
+     */
+    xmm2 = _mm_shuffle_epi32(xmm2, 0xff);
+    xmm2 ^= xmm0;
+    xmm0 = _mm_slli_si128(xmm0, 4);
+    xmm2 ^= xmm0;
+    xmm0 = _mm_slli_si128(xmm0, 4);
+    xmm2 ^= xmm0;
+    xmm0 = _mm_slli_si128(xmm0, 4);
+    xmm0 ^= xmm2;
+    *rk0 = xmm0;
+
+    /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
+     * and proceed to generate next round key from there */
+    xmm2 = _mm_aeskeygenassist_si128(xmm0, 0x00);
+    xmm2 = _mm_shuffle_epi32(xmm2, 0xaa);
+    xmm2 ^= xmm1;
+    xmm1 = _mm_slli_si128(xmm1, 4);
+    xmm2 ^= xmm1;
+    xmm1 = _mm_slli_si128(xmm1, 4);
+    xmm2 ^= xmm1;
+    xmm1 = _mm_slli_si128(xmm1, 4);
+    xmm1 ^= xmm2;
+    *rk1 = xmm1;
+}
+
+static void aesni_setkey_enc_256(unsigned char *rk_bytes,
+                                 const unsigned char *key)
+{
+    __m128i *rk = (__m128i *) rk_bytes;
+
+    memcpy(&rk[0], key, 16);
+    memcpy(&rk[1], key + 16, 16);
+
+    /*
+     * Main "loop" - Generating one more key than necessary,
+     * see definition of mbedtls_aes_context.buf
+     */
+    aesni_set_rk_256(rk[0], rk[1], _mm_aeskeygenassist_si128(rk[1], 0x01), &rk[2], &rk[3]);
+    aesni_set_rk_256(rk[2], rk[3], _mm_aeskeygenassist_si128(rk[3], 0x02), &rk[4], &rk[5]);
+    aesni_set_rk_256(rk[4], rk[5], _mm_aeskeygenassist_si128(rk[5], 0x04), &rk[6], &rk[7]);
+    aesni_set_rk_256(rk[6], rk[7], _mm_aeskeygenassist_si128(rk[7], 0x08), &rk[8], &rk[9]);
+    aesni_set_rk_256(rk[8], rk[9], _mm_aeskeygenassist_si128(rk[9], 0x10), &rk[10], &rk[11]);
+    aesni_set_rk_256(rk[10], rk[11], _mm_aeskeygenassist_si128(rk[11], 0x20), &rk[12], &rk[13]);
+    aesni_set_rk_256(rk[12], rk[13], _mm_aeskeygenassist_si128(rk[13], 0x40), &rk[14], &rk[15]);
+}
+
+#else  /* MBEDTLS_HAVE_AESNI_INTRINSICS */
+
 #if defined(__has_feature)
 #if __has_feature(memory_sanitizer)
 #warning \
@@ -438,6 +773,8 @@ static void aesni_setkey_enc_256(unsigned char *rk,
          : "memory", "cc", "0");
 }
 
+#endif  /* MBEDTLS_HAVE_AESNI_INTRINSICS */
+
 /*
  * Key expansion, wrapper
  */

From 02edb7546fc1bea36cece9738b51b6c78c1e6a94 Mon Sep 17 00:00:00 2001
From: Tom Cosgrove <tom.cosgrove@arm.com>
Date: Mon, 13 Mar 2023 15:32:52 +0000
Subject: [PATCH 32/53] Get aesni.c compiling with Visual Studio

Clang is nice enough to support bitwise operators on __m128i, but MSVC
isn't.

Also, __cpuid() in MSVC comes from <intrin.h> (which is included via
<emmintrin.h>), not <cpuid.h>.

Signed-off-by: Tom Cosgrove <tom.cosgrove@arm.com>
---
 library/aesni.c | 49 ++++++++++++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/library/aesni.c b/library/aesni.c
index 3acc329712..30b4b17ca1 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -33,7 +33,9 @@
 #if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) || defined(MBEDTLS_HAVE_X86_64)
 
 #if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#if !defined(_WIN32)
 #include <cpuid.h>
+#endif
 #include <immintrin.h>
 #endif
 
@@ -79,10 +81,11 @@ int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx,
 {
     const __m128i *rk = (const __m128i *) (ctx->buf + ctx->rk_offset);
     unsigned nr = ctx->nr; // Number of remaining rounds
+
     // Load round key 0
     __m128i xmm0;
     memcpy(&xmm0, input, 16);
-    xmm0 ^= *rk;
+    xmm0 = _mm_xor_si128(xmm0, rk[0]);  // xmm0 ^= *rk;
     ++rk;
     --nr;
 
@@ -122,12 +125,12 @@ static void gcm_clmul(const __m128i aa, const __m128i bb,
     *dd = _mm_clmulepi64_si128(aa, bb, 0x11); // a1*b1 = d1:d0
     __m128i ee = _mm_clmulepi64_si128(aa, bb, 0x10); // a0*b1 = e1:e0
     __m128i ff = _mm_clmulepi64_si128(aa, bb, 0x01); // a1*b0 = f1:f0
-    ff ^= ee;                                        // e1+f1:e0+f0
+    ff = _mm_xor_si128(ff, ee);                      // e1+f1:e0+f0
     ee = ff;                                         // e1+f1:e0+f0
     ff = _mm_srli_si128(ff, 8);                      // 0:e1+f1
     ee = _mm_slli_si128(ee, 8);                      // e0+f0:0
-    *dd ^= ff;                                       // d1:d0+e1+f1
-    *cc ^= ee;                                       // c1+e0+f1:c0
+    *dd = _mm_xor_si128(*dd, ff);                    // d1:d0+e1+f1
+    *cc = _mm_xor_si128(*cc, ee);                    // c1+e0+f1:c0
 }
 
 static void gcm_shift(__m128i *cc, __m128i *dd)
@@ -146,8 +149,8 @@ static void gcm_shift(__m128i *cc, __m128i *dd)
     xmm3 = _mm_slli_si128(xmm3, 8);          // r0>>63:0
     xmm4 = _mm_slli_si128(xmm4, 8);          // 0:r1>>63
 
-    *cc = xmm1 | xmm3;                       // r1<<1|r0>>63:r0<<1
-    *dd = xmm2 | xmm4 | xmm5;                // r3<<1|r2>>62:r2<<1|r1>>63
+    *cc = _mm_or_si128(xmm1, xmm3);          // r1<<1|r0>>63:r0<<1
+    *dd = _mm_or_si128(_mm_or_si128(xmm2, xmm4), xmm5); // r3<<1|r2>>62:r2<<1|r1>>63
 }
 
 static __m128i gcm_reduce1(__m128i xx)
@@ -157,8 +160,8 @@ static __m128i gcm_reduce1(__m128i xx)
     __m128i aa = _mm_slli_epi64(xx, 63);          // x1<<63:x0<<63 = stuff:a
     __m128i bb = _mm_slli_epi64(xx, 62);          // x1<<62:x0<<62 = stuff:b
     __m128i cc = _mm_slli_epi64(xx, 57);          // x1<<57:x0<<57 = stuff:c
-    __m128i dd = _mm_slli_si128(aa ^ bb ^ cc, 8); // a+b+c:0
-    return dd ^ xx;                               // x1+a+b+c:x0 = d:x0
+    __m128i dd = _mm_slli_si128(_mm_xor_si128(_mm_xor_si128(aa, bb), cc), 8); // a+b+c:0
+    return _mm_xor_si128(dd, xx);                 // x1+a+b+c:x0 = d:x0
 }
 
 static __m128i gcm_reduce2(__m128i dx)
@@ -173,9 +176,9 @@ static __m128i gcm_reduce2(__m128i dx)
     __m128i eh = _mm_slli_epi64(dx, 63);          // d<<63:stuff
     __m128i fh = _mm_slli_epi64(dx, 62);          // d<<62:stuff
     __m128i gh = _mm_slli_epi64(dx, 57);          // d<<57:stuff
-    __m128i hh = _mm_srli_si128(eh ^ fh ^ gh, 8); // 0:missing bits of d
+    __m128i hh = _mm_srli_si128(_mm_xor_si128(_mm_xor_si128(eh, fh), gh), 8); // 0:missing bits of d
 
-    return ee ^ ff ^ gg ^ hh ^ dx;
+    return _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(_mm_xor_si128(ee, ff), gg), hh), dx);
 }
 
 void mbedtls_aesni_gcm_mult(unsigned char c[16],
@@ -199,7 +202,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
      */
     __m128i dx = gcm_reduce1(cc);
     __m128i xh = gcm_reduce2(dx);
-    cc = xh ^ dd; // x3+h1:x2+h0
+    cc = _mm_xor_si128(xh, dd); // x3+h1:x2+h0
 
     /* Now byte-reverse the outputs */
     for (size_t i = 0; i < 16; i++) {
@@ -241,13 +244,13 @@ static __m128i aesni_set_rk_128(__m128i xmm0, __m128i xmm1)
      * and this is returned, to be written to the round key buffer.
      */
     xmm1 = _mm_shuffle_epi32(xmm1, 0xff);   // X:X:X:X
-    xmm1 ^= xmm0;                           // X+r3:X+r2:X+r1:r4
+    xmm1 = _mm_xor_si128(xmm1, xmm0);       // X+r3:X+r2:X+r1:r4
     xmm0 = _mm_slli_si128(xmm0, 4);         // r2:r1:r0:0
-    xmm1 ^= xmm0;                           // X+r3+r2:X+r2+r1:r5:r4
+    xmm1 = _mm_xor_si128(xmm1, xmm0);       // X+r3+r2:X+r2+r1:r5:r4
     xmm0 = _mm_slli_si128(xmm0, 4);         // r1:r0:0:0
-    xmm1 ^= xmm0;                           // X+r3+r2+r1:r6:r5:r4
+    xmm1 = _mm_xor_si128(xmm1, xmm0);       // X+r3+r2+r1:r6:r5:r4
     xmm0 = _mm_slli_si128(xmm0, 4);         // r0:0:0:0
-    xmm1 ^= xmm0;                           // r7:r6:r5:r4
+    xmm1 = _mm_xor_si128(xmm1, xmm0);       // r7:r6:r5:r4
     return xmm1;
 }
 
@@ -341,26 +344,26 @@ static void aesni_set_rk_256(__m128i xmm0, __m128i xmm1, __m128i xmm2,
      * On exit, *rk0 is r11:r10:r9:r8 and *rk1 is r15:r14:r13:r12
      */
     xmm2 = _mm_shuffle_epi32(xmm2, 0xff);
-    xmm2 ^= xmm0;
+    xmm2 = _mm_xor_si128(xmm2, xmm0);
     xmm0 = _mm_slli_si128(xmm0, 4);
-    xmm2 ^= xmm0;
+    xmm2 = _mm_xor_si128(xmm2, xmm0);
     xmm0 = _mm_slli_si128(xmm0, 4);
-    xmm2 ^= xmm0;
+    xmm2 = _mm_xor_si128(xmm2, xmm0);
     xmm0 = _mm_slli_si128(xmm0, 4);
-    xmm0 ^= xmm2;
+    xmm0 = _mm_xor_si128(xmm0, xmm2);
     *rk0 = xmm0;
 
     /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
      * and proceed to generate next round key from there */
     xmm2 = _mm_aeskeygenassist_si128(xmm0, 0x00);
     xmm2 = _mm_shuffle_epi32(xmm2, 0xaa);
-    xmm2 ^= xmm1;
+    xmm2 = _mm_xor_si128(xmm2, xmm1);
     xmm1 = _mm_slli_si128(xmm1, 4);
-    xmm2 ^= xmm1;
+    xmm2 = _mm_xor_si128(xmm2, xmm1);
     xmm1 = _mm_slli_si128(xmm1, 4);
-    xmm2 ^= xmm1;
+    xmm2 = _mm_xor_si128(xmm2, xmm1);
     xmm1 = _mm_slli_si128(xmm1, 4);
-    xmm1 ^= xmm2;
+    xmm1 = _mm_xor_si128(xmm1, xmm2);
     *rk1 = xmm1;
 }
 

From dafeee48141787c8cf160763cdbdbb16eab897c5 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Wed, 15 Mar 2023 20:37:57 +0100
Subject: [PATCH 33/53] Improve variable names

To some extent anyway.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.c | 190 ++++++++++++++++++++++++------------------------
 1 file changed, 95 insertions(+), 95 deletions(-)

diff --git a/library/aesni.c b/library/aesni.c
index 30b4b17ca1..c52e63c2f4 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -83,29 +83,29 @@ int mbedtls_aesni_crypt_ecb(mbedtls_aes_context *ctx,
     unsigned nr = ctx->nr; // Number of remaining rounds
 
     // Load round key 0
-    __m128i xmm0;
-    memcpy(&xmm0, input, 16);
-    xmm0 = _mm_xor_si128(xmm0, rk[0]);  // xmm0 ^= *rk;
+    __m128i state;
+    memcpy(&state, input, 16);
+    state = _mm_xor_si128(state, rk[0]);  // state ^= *rk;
     ++rk;
     --nr;
 
     if (mode == 0) {
         while (nr != 0) {
-            xmm0 = _mm_aesdec_si128(xmm0, *rk);
+            state = _mm_aesdec_si128(state, *rk);
             ++rk;
             --nr;
         }
-        xmm0 = _mm_aesdeclast_si128(xmm0, *rk);
+        state = _mm_aesdeclast_si128(state, *rk);
     } else {
         while (nr != 0) {
-            xmm0 = _mm_aesenc_si128(xmm0, *rk);
+            state = _mm_aesenc_si128(state, *rk);
             ++rk;
             --nr;
         }
-        xmm0 = _mm_aesenclast_si128(xmm0, *rk);
+        state = _mm_aesenclast_si128(state, *rk);
     }
 
-    memcpy(output, &xmm0, 16);
+    memcpy(output, &state, 16);
     return 0;
 }
 
@@ -135,25 +135,23 @@ static void gcm_clmul(const __m128i aa, const __m128i bb,
 
 static void gcm_shift(__m128i *cc, __m128i *dd)
 {
-    /*
-     * Now shift the result one bit to the left,
-     * taking advantage of [CLMUL-WP] eq 27 (p. 18)
-     */
-    //                                       // *cc = r1:r0
-    //                                       // *dd = r3:r2
-    __m128i xmm1 = _mm_slli_epi64(*cc, 1);   // r1<<1:r0<<1
-    __m128i xmm2 = _mm_slli_epi64(*dd, 1);   // r3<<1:r2<<1
-    __m128i xmm3 = _mm_srli_epi64(*cc, 63);  // r1>>63:r0>>63
-    __m128i xmm4 = _mm_srli_epi64(*dd, 63);  // r3>>63:r2>>63
-    __m128i xmm5 = _mm_srli_si128(xmm3, 8);  // 0:r1>>63
-    xmm3 = _mm_slli_si128(xmm3, 8);          // r0>>63:0
-    xmm4 = _mm_slli_si128(xmm4, 8);          // 0:r1>>63
+    /* [CMUCL-WP] Algorithm 5 Step 1: shift cc:dd one bit to the left,
+     * taking advantage of [CLMUL-WP] eq 27 (p. 18). */
+    //                                        // *cc = r1:r0
+    //                                        // *dd = r3:r2
+    __m128i cc_lo = _mm_slli_epi64(*cc, 1);   // r1<<1:r0<<1
+    __m128i dd_lo = _mm_slli_epi64(*dd, 1);   // r3<<1:r2<<1
+    __m128i cc_hi = _mm_srli_epi64(*cc, 63);  // r1>>63:r0>>63
+    __m128i dd_hi = _mm_srli_epi64(*dd, 63);  // r3>>63:r2>>63
+    __m128i xmm5 = _mm_srli_si128(cc_hi, 8);  // 0:r1>>63
+    cc_hi = _mm_slli_si128(cc_hi, 8);         // r0>>63:0
+    dd_hi = _mm_slli_si128(dd_hi, 8);         // 0:r1>>63
 
-    *cc = _mm_or_si128(xmm1, xmm3);          // r1<<1|r0>>63:r0<<1
-    *dd = _mm_or_si128(_mm_or_si128(xmm2, xmm4), xmm5); // r3<<1|r2>>62:r2<<1|r1>>63
+    *cc = _mm_or_si128(cc_lo, cc_hi);         // r1<<1|r0>>63:r0<<1
+    *dd = _mm_or_si128(_mm_or_si128(dd_lo, dd_hi), xmm5); // r3<<1|r2>>62:r2<<1|r1>>63
 }
 
-static __m128i gcm_reduce1(__m128i xx)
+static __m128i gcm_reduce(__m128i xx)
 {
     //                                            // xx = x1:x0
     /* [CLMUL-WP] Algorithm 5 Step 2 */
@@ -164,7 +162,7 @@ static __m128i gcm_reduce1(__m128i xx)
     return _mm_xor_si128(dd, xx);                 // x1+a+b+c:x0 = d:x0
 }
 
-static __m128i gcm_reduce2(__m128i dx)
+static __m128i gcm_mix(__m128i dx)
 {
     /* [CLMUL-WP] Algorithm 5 Steps 3 and 4 */
     __m128i ee = _mm_srli_epi64(dx, 1);           // e1:x0>>1 = e1:e0'
@@ -200,8 +198,8 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
      * using [CLMUL-WP] algorithm 5 (p. 18).
      * Currently dd:cc holds x3:x2:x1:x0 (already shifted).
      */
-    __m128i dx = gcm_reduce1(cc);
-    __m128i xh = gcm_reduce2(dx);
+    __m128i dx = gcm_reduce(cc);
+    __m128i xh = gcm_mix(dx);
     cc = _mm_xor_si128(xh, dd); // x3+h1:x2+h0
 
     /* Now byte-reverse the outputs */
@@ -231,27 +229,27 @@ void mbedtls_aesni_inverse_key(unsigned char *invkey,
 /*
  * Key expansion, 128-bit case
  */
-static __m128i aesni_set_rk_128(__m128i xmm0, __m128i xmm1)
+static __m128i aesni_set_rk_128(__m128i state, __m128i xword)
 {
     /*
      * Finish generating the next round key.
      *
-     * On entry xmm0 is r3:r2:r1:r0 and xmm1 is X:stuff:stuff:stuff
-     * with X = rot( sub( r3 ) ) ^ RCON.
+     * On entry state is r3:r2:r1:r0 and xword is X:stuff:stuff:stuff
+     * with X = rot( sub( r3 ) ) ^ RCON (obtained with AESKEYGENASSIST).
      *
-     * On exit, xmm1 is r7:r6:r5:r4
+     * On exit, xword is r7:r6:r5:r4
      * with r4 = X + r0, r5 = r4 + r1, r6 = r5 + r2, r7 = r6 + r3
      * and this is returned, to be written to the round key buffer.
      */
-    xmm1 = _mm_shuffle_epi32(xmm1, 0xff);   // X:X:X:X
-    xmm1 = _mm_xor_si128(xmm1, xmm0);       // X+r3:X+r2:X+r1:r4
-    xmm0 = _mm_slli_si128(xmm0, 4);         // r2:r1:r0:0
-    xmm1 = _mm_xor_si128(xmm1, xmm0);       // X+r3+r2:X+r2+r1:r5:r4
-    xmm0 = _mm_slli_si128(xmm0, 4);         // r1:r0:0:0
-    xmm1 = _mm_xor_si128(xmm1, xmm0);       // X+r3+r2+r1:r6:r5:r4
-    xmm0 = _mm_slli_si128(xmm0, 4);         // r0:0:0:0
-    xmm1 = _mm_xor_si128(xmm1, xmm0);       // r7:r6:r5:r4
-    return xmm1;
+    xword = _mm_shuffle_epi32(xword, 0xff);   // X:X:X:X
+    xword = _mm_xor_si128(xword, state);      // X+r3:X+r2:X+r1:r4
+    state = _mm_slli_si128(state, 4);         // r2:r1:r0:0
+    xword = _mm_xor_si128(xword, state);      // X+r3+r2:X+r2+r1:r5:r4
+    state = _mm_slli_si128(state, 4);         // r1:r0:0:0
+    xword = _mm_xor_si128(xword, state);      // X+r3+r2+r1:r6:r5:r4
+    state = _mm_slli_si128(state, 4);         // r0:0:0:0
+    state = _mm_xor_si128(xword, state);      // r7:r6:r5:r4
+    return state;
 }
 
 static void aesni_setkey_enc_128(unsigned char *rk_bytes,
@@ -275,39 +273,40 @@ static void aesni_setkey_enc_128(unsigned char *rk_bytes,
 /*
  * Key expansion, 192-bit case
  */
-static void aesni_set_rk_192(__m128i *xmm0, __m128i *xmm1, __m128i xmm2,
+static void aesni_set_rk_192(__m128i *state0, __m128i *state1, __m128i xword,
                              unsigned char *rk)
 {
     /*
      * Finish generating the next 6 quarter-keys.
      *
-     * On entry xmm0 is r3:r2:r1:r0, xmm1 is stuff:stuff:r5:r4
-     * and xmm2 is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON.
+     * On entry state0 is r3:r2:r1:r0, state1 is stuff:stuff:r5:r4
+     * and xword is stuff:stuff:X:stuff with X = rot( sub( r3 ) ) ^ RCON
+     * (obtained with AESKEYGENASSIST).
      *
-     * On exit, xmm0 is r9:r8:r7:r6 and xmm1 is stuff:stuff:r11:r10
+     * On exit, state0 is r9:r8:r7:r6 and state1 is stuff:stuff:r11:r10
      * and those are written to the round key buffer.
      */
-    xmm2 = _mm_shuffle_epi32(xmm2, 0x55);     // X:X:X:X
-    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3:X+r2:X+r1:X+r0
-    *xmm0 = _mm_slli_si128(*xmm0, 4);         // r2:r1:r0:0
-    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3+r2:X+r2+r1:X+r1+r0:X+r0
-    *xmm0 = _mm_slli_si128(*xmm0, 4);         // r1:r0:0:0
-    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3+r2+r1:X+r2+r1+r0:X+r1+r0:X+r0
-    *xmm0 = _mm_slli_si128(*xmm0, 4);         // r0:0:0:0
-    xmm2 = _mm_xor_si128(xmm2, *xmm0);        // X+r3+r2+r1+r0:X+r2+r1+r0:X+r1+r0:X+r0
-    *xmm0 = xmm2;                             // = r9:r8:r7:r6
+    xword = _mm_shuffle_epi32(xword, 0x55);   // X:X:X:X
+    xword = _mm_xor_si128(xword, *state0);    // X+r3:X+r2:X+r1:X+r0
+    *state0 = _mm_slli_si128(*state0, 4);     // r2:r1:r0:0
+    xword = _mm_xor_si128(xword, *state0);    // X+r3+r2:X+r2+r1:X+r1+r0:X+r0
+    *state0 = _mm_slli_si128(*state0, 4);     // r1:r0:0:0
+    xword = _mm_xor_si128(xword, *state0);    // X+r3+r2+r1:X+r2+r1+r0:X+r1+r0:X+r0
+    *state0 = _mm_slli_si128(*state0, 4);     // r0:0:0:0
+    xword = _mm_xor_si128(xword, *state0);    // X+r3+r2+r1+r0:X+r2+r1+r0:X+r1+r0:X+r0
+    *state0 = xword;                          // = r9:r8:r7:r6
 
-    xmm2 = _mm_shuffle_epi32(xmm2, 0xff);     // r9:r9:r9:r9
-    xmm2 = _mm_xor_si128(xmm2, *xmm1);        // stuff:stuff:r9+r5:r9+r4
-    *xmm1 = _mm_slli_si128(*xmm1, 4);         // stuff:stuff:r4:0
-    xmm2 = _mm_xor_si128(xmm2, *xmm1);        // stuff:stuff:r9+r5+r4:r9+r4
-    *xmm1 = xmm2;                             // = stuff:stuff:r11:r10
+    xword = _mm_shuffle_epi32(xword, 0xff);   // r9:r9:r9:r9
+    xword = _mm_xor_si128(xword, *state1);    // stuff:stuff:r9+r5:r9+r4
+    *state1 = _mm_slli_si128(*state1, 4);     // stuff:stuff:r4:0
+    xword = _mm_xor_si128(xword, *state1);    // stuff:stuff:r9+r5+r4:r9+r4
+    *state1 = xword;                          // = stuff:stuff:r11:r10
 
-    /* Store xmm0 and the low half of xmm1 into rk, which is conceptually
+    /* Store state0 and the low half of state1 into rk, which is conceptually
      * an array of 24-byte elements. Since 24 is not a multiple of 16,
-     * rk is not necessarily aligned so just `*rk = *xmm0` doesn't work. */
-    memcpy(rk, xmm0, 16);
-    _mm_storeu_si64(rk + 16, *xmm1);
+     * rk is not necessarily aligned so just `*rk = *state0` doesn't work. */
+    memcpy(rk, state0, 16);
+    _mm_storeu_si64(rk + 16, *state1);
 }
 
 static void aesni_setkey_enc_192(unsigned char *rk,
@@ -316,55 +315,56 @@ static void aesni_setkey_enc_192(unsigned char *rk,
     /* First round: use original key */
     memcpy(rk, key, 24);
     /* aes.c guarantees that rk is aligned on a 16-byte boundary. */
-    __m128i xmm0 = ((__m128i *) rk)[0];
-    __m128i xmm1 = _mm_loadl_epi64(((__m128i *) rk) + 1);
+    __m128i state0 = ((__m128i *) rk)[0];
+    __m128i state1 = _mm_loadl_epi64(((__m128i *) rk) + 1);
 
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x01), rk + 24 * 1);
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x02), rk + 24 * 2);
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x04), rk + 24 * 3);
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x08), rk + 24 * 4);
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x10), rk + 24 * 5);
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x20), rk + 24 * 6);
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x40), rk + 24 * 7);
-    aesni_set_rk_192(&xmm0, &xmm1, _mm_aeskeygenassist_si128(xmm1, 0x80), rk + 24 * 8);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x01), rk + 24 * 1);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x02), rk + 24 * 2);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x04), rk + 24 * 3);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x08), rk + 24 * 4);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x10), rk + 24 * 5);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x20), rk + 24 * 6);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x40), rk + 24 * 7);
+    aesni_set_rk_192(&state0, &state1, _mm_aeskeygenassist_si128(state1, 0x80), rk + 24 * 8);
 }
 
 /*
  * Key expansion, 256-bit case
  */
-static void aesni_set_rk_256(__m128i xmm0, __m128i xmm1, __m128i xmm2,
+static void aesni_set_rk_256(__m128i state0, __m128i state1, __m128i xword,
                              __m128i *rk0, __m128i *rk1)
 {
     /*
      * Finish generating the next two round keys.
      *
-     * On entry xmm0 is r3:r2:r1:r0, xmm1 is r7:r6:r5:r4 and
-     * xmm2 is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
+     * On entry state0 is r3:r2:r1:r0, state1 is r7:r6:r5:r4 and
+     * xword is X:stuff:stuff:stuff with X = rot( sub( r7 )) ^ RCON
+     * (obtained with AESKEYGENASSIST).
      *
      * On exit, *rk0 is r11:r10:r9:r8 and *rk1 is r15:r14:r13:r12
      */
-    xmm2 = _mm_shuffle_epi32(xmm2, 0xff);
-    xmm2 = _mm_xor_si128(xmm2, xmm0);
-    xmm0 = _mm_slli_si128(xmm0, 4);
-    xmm2 = _mm_xor_si128(xmm2, xmm0);
-    xmm0 = _mm_slli_si128(xmm0, 4);
-    xmm2 = _mm_xor_si128(xmm2, xmm0);
-    xmm0 = _mm_slli_si128(xmm0, 4);
-    xmm0 = _mm_xor_si128(xmm0, xmm2);
-    *rk0 = xmm0;
+    xword = _mm_shuffle_epi32(xword, 0xff);
+    xword = _mm_xor_si128(xword, state0);
+    state0 = _mm_slli_si128(state0, 4);
+    xword = _mm_xor_si128(xword, state0);
+    state0 = _mm_slli_si128(state0, 4);
+    xword = _mm_xor_si128(xword, state0);
+    state0 = _mm_slli_si128(state0, 4);
+    state0 = _mm_xor_si128(state0, xword);
+    *rk0 = state0;
 
-    /* Set xmm2 to stuff:Y:stuff:stuff with Y = subword( r11 )
+    /* Set xword to stuff:Y:stuff:stuff with Y = subword( r11 )
      * and proceed to generate next round key from there */
-    xmm2 = _mm_aeskeygenassist_si128(xmm0, 0x00);
-    xmm2 = _mm_shuffle_epi32(xmm2, 0xaa);
-    xmm2 = _mm_xor_si128(xmm2, xmm1);
-    xmm1 = _mm_slli_si128(xmm1, 4);
-    xmm2 = _mm_xor_si128(xmm2, xmm1);
-    xmm1 = _mm_slli_si128(xmm1, 4);
-    xmm2 = _mm_xor_si128(xmm2, xmm1);
-    xmm1 = _mm_slli_si128(xmm1, 4);
-    xmm1 = _mm_xor_si128(xmm1, xmm2);
-    *rk1 = xmm1;
+    xword = _mm_aeskeygenassist_si128(state0, 0x00);
+    xword = _mm_shuffle_epi32(xword, 0xaa);
+    xword = _mm_xor_si128(xword, state1);
+    state1 = _mm_slli_si128(state1, 4);
+    xword = _mm_xor_si128(xword, state1);
+    state1 = _mm_slli_si128(state1, 4);
+    xword = _mm_xor_si128(xword, state1);
+    state1 = _mm_slli_si128(state1, 4);
+    state1 = _mm_xor_si128(state1, xword);
+    *rk1 = state1;
 }
 
 static void aesni_setkey_enc_256(unsigned char *rk_bytes,

From dde3c6532ef74a76a0837c22eb0df7e63d5858de Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Wed, 15 Mar 2023 23:16:27 +0100
Subject: [PATCH 34/53] Fix MSVC portability

MSVC doesn't have _mm_storeu_si64. Fortunately it isn't really needed here.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/aesni.c b/library/aesni.c
index c52e63c2f4..e6143ee5ce 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -306,7 +306,7 @@ static void aesni_set_rk_192(__m128i *state0, __m128i *state1, __m128i xword,
      * an array of 24-byte elements. Since 24 is not a multiple of 16,
      * rk is not necessarily aligned so just `*rk = *state0` doesn't work. */
     memcpy(rk, state0, 16);
-    _mm_storeu_si64(rk + 16, *state1);
+    memcpy(rk + 16, state1, 8);
 }
 
 static void aesni_setkey_enc_192(unsigned char *rk,

From 215517667fad69309ede6da4566b35af79fec7c4 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Wed, 15 Mar 2023 23:20:26 +0100
Subject: [PATCH 35/53] Travis: run selftest on Windows

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index cdb79d1aa2..ace1e5c060 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -79,6 +79,7 @@ jobs:
         # Logs appear out of sequence on Windows. Give time to catch up.
         - sleep 5
         - scripts/windows_msbuild.bat v141 # Visual Studio 2017
+        - visualc/VS2013/x64/Release/selftest.exe
 
     - name: full configuration on arm64
       os: linux

From 0cd9ab71075947e65d41370b62d3c5536ceaa331 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 13:06:14 +0100
Subject: [PATCH 36/53] Fix code style

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/gcm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/library/gcm.c b/library/gcm.c
index 4e8ec08815..968e7175fb 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -845,8 +845,7 @@ int mbedtls_gcm_self_test(int verbose)
     mbedtls_cipher_id_t cipher = MBEDTLS_CIPHER_ID_AES;
     size_t olen;
 
-    if (verbose != 0)
-    {
+    if (verbose != 0) {
 #if defined(MBEDTLS_GCM_ALT)
         mbedtls_printf("  GCM note: alternative implementation.\n");
 #else /* MBEDTLS_GCM_ALT */

From d0185f78c0bd5d23064a8dabe9b2d9c0f7c6805f Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 13:08:18 +0100
Subject: [PATCH 37/53] Fix typo in comment

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/aesni.c b/library/aesni.c
index e6143ee5ce..e24527c826 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -130,7 +130,7 @@ static void gcm_clmul(const __m128i aa, const __m128i bb,
     ff = _mm_srli_si128(ff, 8);                      // 0:e1+f1
     ee = _mm_slli_si128(ee, 8);                      // e0+f0:0
     *dd = _mm_xor_si128(*dd, ff);                    // d1:d0+e1+f1
-    *cc = _mm_xor_si128(*cc, ee);                    // c1+e0+f1:c0
+    *cc = _mm_xor_si128(*cc, ee);                    // c1+e0+f0:c0
 }
 
 static void gcm_shift(__m128i *cc, __m128i *dd)

From 148cad134a740f5f72f4c7376ae37bc5befeab28 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 13:08:42 +0100
Subject: [PATCH 38/53] Fix unaligned access if the context is moved during
 operation

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aes.c | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/library/aes.c b/library/aes.c
index 69d4eadfa8..1f3d8ad91d 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -962,6 +962,35 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx,
 }
 #endif /* !MBEDTLS_AES_DECRYPT_ALT */
 
+#if defined(MBEDTLS_AESNI_HAVE_CODE) || \
+    (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86))
+/* VIA Padlock and our intrinsics-based implementation of AESNI require
+ * the round keys to be aligned on a 16-byte boundary. We take care of this
+ * before creating them, but the AES context may have moved (this can happen
+ * if the library is called from a language with managed memory), and in later
+ * calls it might have a different alignment with respect to 16-byte memory.
+ * So we may need to realign.
+ */
+static void aes_maybe_realign(mbedtls_aes_context *ctx)
+{
+    /* We want a 16-byte alignment. Note that buf is a pointer to uint32_t
+     * and rk_offset is in units of uint32_t words = 4 bytes. We want a
+     * 4-word alignment. */
+    uintptr_t current_address = (uintptr_t) (ctx->buf + ctx->rk_offset);
+    unsigned current_alignment = (current_address & 0x0000000f) / 4;
+    if (current_alignment != 0) {
+        unsigned new_offset = ctx->rk_offset + 4 - current_alignment;
+        if (new_offset >= 4) {
+            new_offset -= 4;
+        }
+        memmove(ctx->buf + new_offset,     // new address
+                ctx->buf + ctx->rk_offset, // current address
+                (ctx->nr + 1) * 16);       // number of round keys * bytes per rk
+        ctx->rk_offset = new_offset;
+    }
+}
+#endif
+
 /*
  * AES-ECB block encryption/decryption
  */
@@ -976,6 +1005,7 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
 
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+        aes_maybe_realign(ctx);
         return mbedtls_aesni_crypt_ecb(ctx, mode, input, output);
     }
 #endif
@@ -988,13 +1018,8 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
 
 #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
     if (aes_padlock_ace > 0) {
-        if (mbedtls_padlock_xcryptecb(ctx, mode, input, output) == 0) {
-            return 0;
-        }
-
-        // If padlock data misaligned, we just fall back to
-        // unaccelerated mode
-        //
+        aes_maybe_realign(ctx);
+        return mbedtls_padlock_xcryptecb(ctx, mode, input, output);
     }
 #endif
 

From d50cfddfd7a089528983c95afbc4b01b81832de0 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 14:25:58 +0100
Subject: [PATCH 39/53] AES context copy test: clean up

Don't use hexcmp to compare binary data. Improve readability.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/suites/test_suite_aes.function | 30 +++++++++++++++++-----------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function
index d95503ad8c..332b9a7948 100644
--- a/tests/suites/test_suite_aes.function
+++ b/tests/suites/test_suite_aes.function
@@ -468,32 +468,38 @@ void aes_misc_params()
 /* END_CASE */
 
 /* BEGIN_CASE */
-void aes_ecb_copy_context(data_t *key_str, data_t *src_str)
+void aes_ecb_copy_context(data_t *key, data_t *src)
 {
     unsigned char output1[16], output2[16], plain[16];
     mbedtls_aes_context ctx1, ctx2, ctx3;
 
+    TEST_EQUAL(src->len, 16);
+
     // Set key and encrypt with original context
     mbedtls_aes_init(&ctx1);
-    TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx1, key_str->x,
-                                       key_str->len * 8) == 0);
+    TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx1, key->x,
+                                       key->len * 8) == 0);
     TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx1, MBEDTLS_AES_ENCRYPT,
-                                      src_str->x, output1) == 0);
-
+                                      src->x, output1) == 0);
     ctx2 = ctx1;
-    TEST_ASSERT(mbedtls_aes_setkey_dec(&ctx1, key_str->x,
-                                       key_str->len * 8) == 0);
+
+    // Set key for decryption with original context
+    TEST_ASSERT(mbedtls_aes_setkey_dec(&ctx1, key->x,
+                                       key->len * 8) == 0);
     ctx3 = ctx1;
+
+    // Wipe the original context to make sure nothing from it is used
     memset(&ctx1, 0, sizeof(ctx1));
 
-    // Encrypt and decrypt with copied context
+    // Encrypt with copied context
     TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx2, MBEDTLS_AES_ENCRYPT,
-                                      src_str->x, output2) == 0);
+                                      src->x, output2) == 0);
+    ASSERT_COMPARE(output1, 16, output2, 16);
+
+    // Decrypt with copied context
     TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx3, MBEDTLS_AES_DECRYPT,
                                       output1, plain) == 0);
-
-    TEST_ASSERT(mbedtls_test_hexcmp(output1, output2, 16, 16) == 0);
-    TEST_ASSERT(mbedtls_test_hexcmp(src_str->x, plain, src_str->len, 16) == 0);
+    ASSERT_COMPARE(src->x, 16, plain, 16);
 }
 /* END_CASE */
 

From f99ec202d7dc5c3ba4ee73dc56b6862c97133634 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 14:26:47 +0100
Subject: [PATCH 40/53] AES context copy test: have one for each key size

Don't use all-bytes zero as a string, it's harder to debug.

This commit uses the test vectors from FIPS 197 appendix C.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/suites/test_suite_aes.ecb.data | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/suites/test_suite_aes.ecb.data b/tests/suites/test_suite_aes.ecb.data
index b468ac30b7..d1c88ac914 100644
--- a/tests/suites/test_suite_aes.ecb.data
+++ b/tests/suites/test_suite_aes.ecb.data
@@ -229,5 +229,11 @@ aes_decrypt_ecb:"000000000000000000000000000000000000000000000000000000000000000
 AES-256-ECB Decrypt NIST KAT #12
 aes_decrypt_ecb:"0000000000000000000000000000000000000000000000000000000000000000":"9b80eefb7ebe2d2b16247aa0efc72f5d":"e0000000000000000000000000000000":0
 
-AES-256-ECB Copy Context NIST KAT #1
-aes_ecb_copy_context:"c1cc358b449909a19436cfbb3f852ef8bcb5ed12ac7058325f56e6099aab1a1c":"00000000000000000000000000000000"
+AES-128-ECB Copy context
+aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f":"00112233445566778899aabbccddeeff"
+
+AES-192-ECB Copy context
+aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f1011121314151617":"00112233445566778899aabbccddeeff"
+
+AES-256-ECB Copy context
+aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f":"00112233445566778899aabbccddeeff"

From 5fcdf49f0e0a36342482e378f0d84cf35bc96ea3 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 14:38:29 +0100
Subject: [PATCH 41/53] Move copy-context testing to an auxiliary function

This is in preparation for running it multiple times with different
alignments.

This commit also fixes the fact that we weren't calling mbedtls_aes_free()
on the context (we still aren't if the test fails). It's not an issue except
possibly in some ALT implementations.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/suites/test_suite_aes.ecb.data |  6 +-
 tests/suites/test_suite_aes.function | 90 ++++++++++++++++++----------
 2 files changed, 63 insertions(+), 33 deletions(-)

diff --git a/tests/suites/test_suite_aes.ecb.data b/tests/suites/test_suite_aes.ecb.data
index d1c88ac914..93858656f5 100644
--- a/tests/suites/test_suite_aes.ecb.data
+++ b/tests/suites/test_suite_aes.ecb.data
@@ -230,10 +230,10 @@ AES-256-ECB Decrypt NIST KAT #12
 aes_decrypt_ecb:"0000000000000000000000000000000000000000000000000000000000000000":"9b80eefb7ebe2d2b16247aa0efc72f5d":"e0000000000000000000000000000000":0
 
 AES-128-ECB Copy context
-aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f":"00112233445566778899aabbccddeeff"
+aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f"
 
 AES-192-ECB Copy context
-aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f1011121314151617":"00112233445566778899aabbccddeeff"
+aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f1011121314151617"
 
 AES-256-ECB Copy context
-aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f":"00112233445566778899aabbccddeeff"
+aes_ecb_copy_context:"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f"
diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function
index 332b9a7948..51a4d4dbab 100644
--- a/tests/suites/test_suite_aes.function
+++ b/tests/suites/test_suite_aes.function
@@ -1,5 +1,61 @@
 /* BEGIN_HEADER */
 #include "mbedtls/aes.h"
+
+/* Test AES with a copied context.
+ *
+ * master, enc and dec must be AES context objects. They don't need to
+ * be initialized, and are left freed.
+ */
+static int test_copy(const data_t *key,
+                     mbedtls_aes_context *master,
+                     mbedtls_aes_context *enc,
+                     mbedtls_aes_context *dec)
+{
+    unsigned char plaintext[16] = {
+        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    };
+    unsigned char ciphertext[16];
+    unsigned char output[16];
+
+    // Set key and encrypt with original context
+    mbedtls_aes_init(master);
+    TEST_ASSERT(mbedtls_aes_setkey_enc(master, key->x,
+                                       key->len * 8) == 0);
+    TEST_ASSERT(mbedtls_aes_crypt_ecb(master, MBEDTLS_AES_ENCRYPT,
+                                      plaintext, ciphertext) == 0);
+    *enc = *master;
+
+    // Set key for decryption with original context
+    mbedtls_aes_init(master);
+    TEST_ASSERT(mbedtls_aes_setkey_dec(master, key->x,
+                                       key->len * 8) == 0);
+    *dec = *master;
+
+    // Wipe the original context to make sure nothing from it is used
+    memset(master, 0, sizeof(*master));
+
+    // Encrypt with copied context
+    TEST_ASSERT(mbedtls_aes_crypt_ecb(enc, MBEDTLS_AES_ENCRYPT,
+                                      plaintext, output) == 0);
+    ASSERT_COMPARE(ciphertext, 16, output, 16);
+    mbedtls_aes_free(enc);
+
+    // Decrypt with copied context
+    TEST_ASSERT(mbedtls_aes_crypt_ecb(dec, MBEDTLS_AES_DECRYPT,
+                                      ciphertext, output) == 0);
+    ASSERT_COMPARE(plaintext, 16, output, 16);
+    mbedtls_aes_free(dec);
+
+    return 1;
+
+exit:
+    /* Bug: we may be leaving something unfreed. This is harmless
+     * in our built-in implementations, but might cause a memory leak
+     * with alternative implementations. */
+    return 0;
+}
+
 /* END_HEADER */
 
 /* BEGIN_DEPENDENCIES
@@ -468,38 +524,12 @@ void aes_misc_params()
 /* END_CASE */
 
 /* BEGIN_CASE */
-void aes_ecb_copy_context(data_t *key, data_t *src)
+void aes_ecb_copy_context(data_t *key)
 {
-    unsigned char output1[16], output2[16], plain[16];
     mbedtls_aes_context ctx1, ctx2, ctx3;
-
-    TEST_EQUAL(src->len, 16);
-
-    // Set key and encrypt with original context
-    mbedtls_aes_init(&ctx1);
-    TEST_ASSERT(mbedtls_aes_setkey_enc(&ctx1, key->x,
-                                       key->len * 8) == 0);
-    TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx1, MBEDTLS_AES_ENCRYPT,
-                                      src->x, output1) == 0);
-    ctx2 = ctx1;
-
-    // Set key for decryption with original context
-    TEST_ASSERT(mbedtls_aes_setkey_dec(&ctx1, key->x,
-                                       key->len * 8) == 0);
-    ctx3 = ctx1;
-
-    // Wipe the original context to make sure nothing from it is used
-    memset(&ctx1, 0, sizeof(ctx1));
-
-    // Encrypt with copied context
-    TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx2, MBEDTLS_AES_ENCRYPT,
-                                      src->x, output2) == 0);
-    ASSERT_COMPARE(output1, 16, output2, 16);
-
-    // Decrypt with copied context
-    TEST_ASSERT(mbedtls_aes_crypt_ecb(&ctx3, MBEDTLS_AES_DECRYPT,
-                                      output1, plain) == 0);
-    ASSERT_COMPARE(src->x, 16, plain, 16);
+    if (!test_copy(key, &ctx1, &ctx2, &ctx3)) {
+        goto exit;
+    }
 }
 /* END_CASE */
 

From 844f65dc65513312c5313f8d4f7dcd63408e5a3f Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 14:54:48 +0100
Subject: [PATCH 42/53] Explicitly test AES contexts with different alignments

Don't leave it up to chance.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/suites/test_suite_aes.function | 78 +++++++++++++++++++++++++++-
 1 file changed, 76 insertions(+), 2 deletions(-)

diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function
index 51a4d4dbab..a535086bb6 100644
--- a/tests/suites/test_suite_aes.function
+++ b/tests/suites/test_suite_aes.function
@@ -526,10 +526,84 @@ void aes_misc_params()
 /* BEGIN_CASE */
 void aes_ecb_copy_context(data_t *key)
 {
-    mbedtls_aes_context ctx1, ctx2, ctx3;
-    if (!test_copy(key, &ctx1, &ctx2, &ctx3)) {
+    /* We test context copying multiple times, with different alignments
+     * of the original and of the copies. */
+
+    void *src = NULL; // Memory block containing the original context
+    void *enc = NULL; // Memory block containing the copy doing encryption
+    void *dec = NULL; // Memory block containing the copy doing decryption
+
+    struct align1 {
+        char bump;
+        mbedtls_aes_context ctx;
+    };
+
+    /* All peak alignment */
+    ASSERT_ALLOC(src, sizeof(mbedtls_aes_context));
+    ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context));
+    ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context));
+    if (!test_copy(key, src, enc, dec)) {
         goto exit;
     }
+    mbedtls_free(src);
+    src = NULL;
+    mbedtls_free(enc);
+    enc = NULL;
+    mbedtls_free(dec);
+    dec = NULL;
+
+    /* Original shifted */
+    ASSERT_ALLOC(src, sizeof(struct align1));
+    ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context));
+    ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context));
+    if (!test_copy(key, &((struct align1 *) src)->ctx, enc, dec)) {
+        goto exit;
+    }
+    mbedtls_free(src);
+    src = NULL;
+    mbedtls_free(enc);
+    enc = NULL;
+    mbedtls_free(dec);
+    dec = NULL;
+
+    /* Copies shifted */
+    ASSERT_ALLOC(src, sizeof(mbedtls_aes_context));
+    ASSERT_ALLOC(enc, sizeof(struct align1));
+    ASSERT_ALLOC(dec, sizeof(struct align1));
+    if (!test_copy(key,
+                   src,
+                   &((struct align1 *) enc)->ctx,
+                   &((struct align1 *) dec)->ctx)) {
+        goto exit;
+    }
+    mbedtls_free(src);
+    src = NULL;
+    mbedtls_free(enc);
+    enc = NULL;
+    mbedtls_free(dec);
+    dec = NULL;
+
+    /* Source and copies shifted */
+    ASSERT_ALLOC(src, sizeof(struct align1));
+    ASSERT_ALLOC(enc, sizeof(struct align1));
+    ASSERT_ALLOC(dec, sizeof(struct align1));
+    if (!test_copy(key,
+                   &((struct align1 *) src)->ctx,
+                   &((struct align1 *) enc)->ctx,
+                   &((struct align1 *) dec)->ctx)) {
+        goto exit;
+    }
+    mbedtls_free(src);
+    src = NULL;
+    mbedtls_free(enc);
+    enc = NULL;
+    mbedtls_free(dec);
+    dec = NULL;
+
+exit:
+    mbedtls_free(src);
+    mbedtls_free(enc);
+    mbedtls_free(dec);
 }
 /* END_CASE */
 

From 0f454e4642c9309f2de66fe9f9911e42c4bad822 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 14:58:46 +0100
Subject: [PATCH 43/53] Use consistent guards for padlock code

The padlock feature is enabled if
```
defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
```
with the second macro coming from `padlock.h`. The availability of the
macro `MBEDTLS_PADLOCK_ALIGN16` is coincidentally equivalent to
`MBEDTLS_HAVE_X86` but this is not meaningful.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aes.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/library/aes.c b/library/aes.c
index 1f3d8ad91d..b4b34232d1 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -47,8 +47,7 @@
 
 #if !defined(MBEDTLS_AES_ALT)
 
-#if defined(MBEDTLS_PADLOCK_C) &&                      \
-    (defined(MBEDTLS_HAVE_X86) || defined(MBEDTLS_PADLOCK_ALIGN16))
+#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
 static int aes_padlock_ace = -1;
 #endif
 
@@ -530,7 +529,7 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
 #endif
 
     ctx->rk_offset = 0;
-#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_PADLOCK_ALIGN16)
+#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
     if (aes_padlock_ace == -1) {
         aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
     }
@@ -642,7 +641,7 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key,
     mbedtls_aes_init(&cty);
 
     ctx->rk_offset = 0;
-#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_PADLOCK_ALIGN16)
+#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
     if (aes_padlock_ace == -1) {
         aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
     }

From 04fa1a4054c17489332d9eaadd1c3af45f1903ab Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Thu, 16 Mar 2023 15:00:03 +0000
Subject: [PATCH 44/53] Threat Model: fix copy paste

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 SECURITY.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 8d3678a5ee..61e39361af 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -94,10 +94,9 @@ application's threat model, they need to be mitigated by the platform.
 
 ### Physical attacks
 
-In this section, we consider an attacker who can attacker has access to
-physical information about the hardware Mbed TLS is running on and/or can alter
-the physical state of the hardware (e.g. power analysis, radio emissions or
-fault injection).
+In this section, we consider an attacker who has access to physical information
+about the hardware Mbed TLS is running on and/or can alter the physical state
+of the hardware (e.g. power analysis, radio emissions or fault injection).
 
 Mbed TLS doesn't make any security guarantees against physical attacks. If
 physical attacks are present in a use case or a user application's threat

From dd6021caf1d31ee17219813b98bd80b9998fdb29 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 16:51:40 +0100
Subject: [PATCH 45/53] Remove the dependency of MBEDTLS_AESNI_C on
 MBEDTLS_HAVE_ASM

AESNI can now be implemented with intrinsics.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 include/mbedtls/check_config.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h
index 3065df5d94..394f1e8ea4 100644
--- a/include/mbedtls/check_config.h
+++ b/include/mbedtls/check_config.h
@@ -66,10 +66,6 @@
 #error "MBEDTLS_HAVE_TIME_DATE without MBEDTLS_HAVE_TIME does not make sense"
 #endif
 
-#if defined(MBEDTLS_AESNI_C) && !defined(MBEDTLS_HAVE_ASM)
-#error "MBEDTLS_AESNI_C defined, but not all prerequisites"
-#endif
-
 #if defined(MBEDTLS_AESCE_C) && !defined(MBEDTLS_HAVE_ASM)
 #error "MBEDTLS_AESCE_C defined, but not all prerequisites"
 #endif

From 0de8f853f04217ba982d08bb6393bb529eeb596a Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 17:14:59 +0100
Subject: [PATCH 46/53] Clean up AES context alignment code

Use a single auxiliary function to determine rk_offset, covering both
setkey_enc and setkey_dec, covering both AESNI and PADLOCK. For AESNI, only
build this when using the intrinsics-based implementation, since the
assembly implementation supports unaligned access.

Simplify "do we need to realign?" to "is the desired offset now equal to
the current offset?".

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aes.c | 107 ++++++++++++++++++++++++++------------------------
 1 file changed, 56 insertions(+), 51 deletions(-)

diff --git a/library/aes.c b/library/aes.c
index b4b34232d1..91b6d431e4 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -504,6 +504,53 @@ void mbedtls_aes_xts_free(mbedtls_aes_xts_context *ctx)
 }
 #endif /* MBEDTLS_CIPHER_MODE_XTS */
 
+/* Some implementations need the round keys to be aligned.
+ * Return an offset to be added to buf, such that (buf + offset) is
+ * correctly aligned.
+ * Note that the offset is in units of elements of buf, i.e. 32-bit words,
+ * i.e. an offset of 1 means 4 bytes and so on.
+ */
+#if (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) ||        \
+    defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#define MAY_NEED_TO_ALIGN
+#endif
+static unsigned mbedtls_aes_rk_offset(uint32_t *buf)
+{
+#if defined(MAY_NEED_TO_ALIGN)
+    int align_16_bytes = 0;
+
+#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
+    if (aes_padlock_ace == -1) {
+        aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
+    }
+    if (aes_padlock_ace) {
+        align_16_bytes = 1;
+    }
+#endif
+
+#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+        align_16_bytes = 1;
+    }
+#endif
+
+    if (align_16_bytes) {
+        /* These implementations needs 16-byte alignment
+         * for the round key array. */
+        unsigned delta = ((uintptr_t) buf & 0x0000000fU) / 4;
+        if (delta == 0) {
+            return 0;
+        } else {
+            return 4 - delta; // 16 bytes = 4 uint32_t
+        }
+    }
+#else /* MAY_NEED_TO_ALIGN */
+    (void) buf;
+#endif /* MAY_NEED_TO_ALIGN */
+
+    return 0;
+}
+
 /*
  * AES key schedule (encryption)
  */
@@ -528,27 +575,11 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
     }
 #endif
 
-    ctx->rk_offset = 0;
-#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
-    if (aes_padlock_ace == -1) {
-        aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
-    }
-
-    if (aes_padlock_ace) {
-        ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf;
-    }
-#endif
+    ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf);
     RK = ctx->buf + ctx->rk_offset;
 
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
-        /* The intrinsics-based implementation needs 16-byte alignment
-         * for the round key array. */
-        unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
-        if (delta != 0) {
-            ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
-        }
-        RK = ctx->buf + ctx->rk_offset;
         return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits);
     }
 #endif
@@ -640,26 +671,7 @@ int mbedtls_aes_setkey_dec(mbedtls_aes_context *ctx, const unsigned char *key,
 
     mbedtls_aes_init(&cty);
 
-    ctx->rk_offset = 0;
-#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
-    if (aes_padlock_ace == -1) {
-        aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
-    }
-
-    if (aes_padlock_ace) {
-        ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf;
-    }
-#endif
-#if defined(MBEDTLS_AESNI_HAVE_CODE)
-    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
-        /* The intrinsics-based implementation needs 16-byte alignment
-         * for the round key array. */
-        unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
-        if (delta != 0) {
-            ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
-        }
-    }
-#endif
+    ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf);
     RK = ctx->buf + ctx->rk_offset;
 
     /* Also checks keybits */
@@ -961,8 +973,7 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx,
 }
 #endif /* !MBEDTLS_AES_DECRYPT_ALT */
 
-#if defined(MBEDTLS_AESNI_HAVE_CODE) || \
-    (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86))
+#if defined(MAY_NEED_TO_ALIGN)
 /* VIA Padlock and our intrinsics-based implementation of AESNI require
  * the round keys to be aligned on a 16-byte boundary. We take care of this
  * before creating them, but the AES context may have moved (this can happen
@@ -972,16 +983,8 @@ int mbedtls_internal_aes_decrypt(mbedtls_aes_context *ctx,
  */
 static void aes_maybe_realign(mbedtls_aes_context *ctx)
 {
-    /* We want a 16-byte alignment. Note that buf is a pointer to uint32_t
-     * and rk_offset is in units of uint32_t words = 4 bytes. We want a
-     * 4-word alignment. */
-    uintptr_t current_address = (uintptr_t) (ctx->buf + ctx->rk_offset);
-    unsigned current_alignment = (current_address & 0x0000000f) / 4;
-    if (current_alignment != 0) {
-        unsigned new_offset = ctx->rk_offset + 4 - current_alignment;
-        if (new_offset >= 4) {
-            new_offset -= 4;
-        }
+    unsigned new_offset = mbedtls_aes_rk_offset(ctx->buf);
+    if (new_offset != ctx->rk_offset) {
         memmove(ctx->buf + new_offset,     // new address
                 ctx->buf + ctx->rk_offset, // current address
                 (ctx->nr + 1) * 16);       // number of round keys * bytes per rk
@@ -1002,9 +1005,12 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
         return MBEDTLS_ERR_AES_BAD_INPUT_DATA;
     }
 
+#if defined(MAY_NEED_TO_ALIGN)
+    aes_maybe_realign(ctx);
+#endif
+
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
-        aes_maybe_realign(ctx);
         return mbedtls_aesni_crypt_ecb(ctx, mode, input, output);
     }
 #endif
@@ -1017,7 +1023,6 @@ int mbedtls_aes_crypt_ecb(mbedtls_aes_context *ctx,
 
 #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
     if (aes_padlock_ace > 0) {
-        aes_maybe_realign(ctx);
         return mbedtls_padlock_xcryptecb(ctx, mode, input, output);
     }
 #endif

From 9c682e724a35cda220ee26f91f62bd8bebc24654 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 17:21:33 +0100
Subject: [PATCH 47/53] AESNI: Overhaul implementation selection

Have clearly separated code to:
* determine whether the assembly-based implementation is available;
* determine whether the intrinsics-based implementation is available;
* select one of the available implementations if any.

Now MBEDTLS_AESNI_HAVE_CODE can be the single interface for aes.c and
aesni.c to determine which AESNI is built.

Change the implementation selection: now, if both implementations are
available, always prefer assembly. Before, the intrinsics were used if
available. This preference is to minimize disruption, and will likely
be revised in a later minor release.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aes.c   |  4 ++--
 library/aesni.c | 18 +++++++++---------
 library/aesni.h | 35 ++++++++++++++++++++++++++---------
 3 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/library/aes.c b/library/aes.c
index 91b6d431e4..69da5828ac 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -511,7 +511,7 @@ void mbedtls_aes_xts_free(mbedtls_aes_xts_context *ctx)
  * i.e. an offset of 1 means 4 bytes and so on.
  */
 #if (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) ||        \
-    defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+    (defined(MBEDTLS_AESNI_C) && MBEDTLS_AESNI_HAVE_CODE == 2)
 #define MAY_NEED_TO_ALIGN
 #endif
 static unsigned mbedtls_aes_rk_offset(uint32_t *buf)
@@ -528,7 +528,7 @@ static unsigned mbedtls_aes_rk_offset(uint32_t *buf)
     }
 #endif
 
-#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#if defined(MBEDTLS_AESNI_C) && MBEDTLS_AESNI_HAVE_CODE == 2
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
         align_16_bytes = 1;
     }
diff --git a/library/aesni.c b/library/aesni.c
index e24527c826..a23c5b595b 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -30,9 +30,9 @@
 
 #include <string.h>
 
-#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS) || defined(MBEDTLS_HAVE_X86_64)
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
 
-#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#if MBEDTLS_AESNI_HAVE_CODE == 2
 #if !defined(_WIN32)
 #include <cpuid.h>
 #endif
@@ -48,7 +48,7 @@ int mbedtls_aesni_has_support(unsigned int what)
     static unsigned int c = 0;
 
     if (!done) {
-#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#if MBEDTLS_AESNI_HAVE_CODE == 2
         static unsigned info[4] = { 0, 0, 0, 0 };
 #if defined(_MSC_VER)
         __cpuid(info, 1);
@@ -56,20 +56,20 @@ int mbedtls_aesni_has_support(unsigned int what)
         __cpuid(1, info[0], info[1], info[2], info[3]);
 #endif
         c = info[2];
-#else
+#else /* AESNI using asm */
         asm ("movl  $1, %%eax   \n\t"
              "cpuid             \n\t"
              : "=c" (c)
              :
              : "eax", "ebx", "edx");
-#endif
+#endif /* MBEDTLS_AESNI_HAVE_CODE */
         done = 1;
     }
 
     return (c & what) != 0;
 }
 
-#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#if MBEDTLS_AESNI_HAVE_CODE == 2
 
 /*
  * AES-NI AES-ECB block en(de)cryption
@@ -388,7 +388,7 @@ static void aesni_setkey_enc_256(unsigned char *rk_bytes,
     aesni_set_rk_256(rk[12], rk[13], _mm_aeskeygenassist_si128(rk[13], 0x40), &rk[14], &rk[15]);
 }
 
-#else  /* MBEDTLS_HAVE_AESNI_INTRINSICS */
+#else /* MBEDTLS_AESNI_HAVE_CODE == 1 */
 
 #if defined(__has_feature)
 #if __has_feature(memory_sanitizer)
@@ -776,7 +776,7 @@ static void aesni_setkey_enc_256(unsigned char *rk,
          : "memory", "cc", "0");
 }
 
-#endif  /* MBEDTLS_HAVE_AESNI_INTRINSICS */
+#endif  /* MBEDTLS_AESNI_HAVE_CODE */
 
 /*
  * Key expansion, wrapper
@@ -795,6 +795,6 @@ int mbedtls_aesni_setkey_enc(unsigned char *rk,
     return 0;
 }
 
-#endif /* MBEDTLS_HAVE_X86_64 */
+#endif /* MBEDTLS_AESNI_HAVE_CODE */
 
 #endif /* MBEDTLS_AESNI_C */
diff --git a/library/aesni.h b/library/aesni.h
index c1c4bdd8f8..a81a11725d 100644
--- a/library/aesni.h
+++ b/library/aesni.h
@@ -32,6 +32,9 @@
 #define MBEDTLS_AESNI_AES      0x02000000u
 #define MBEDTLS_AESNI_CLMUL    0x00000002u
 
+/* Can we do AESNI with inline assembly?
+ * (Only implemented with gas syntax, only for 64-bit.)
+ */
 #if defined(MBEDTLS_HAVE_ASM) && defined(__GNUC__) && \
     (defined(__amd64__) || defined(__x86_64__))   &&  \
     !defined(MBEDTLS_HAVE_X86_64)
@@ -40,19 +43,33 @@
 
 #if defined(MBEDTLS_AESNI_C)
 
-#if defined(MBEDTLS_HAVE_X86_64)
-#define MBEDTLS_AESNI_HAVE_CODE // via assembly
-#endif
-
+/* Can we do AESNI with intrinsics?
+ * (Only implemented with certain compilers, .)
+ */
+#undef MBEDTLS_AESNI_HAVE_INTRINSICS
 #if defined(_MSC_VER)
-#define MBEDTLS_HAVE_AESNI_INTRINSICS
+/* Visual Studio supports AESNI intrinsics since VS 2008 SP1. We only support
+ * VS 2013 and up for other reasons anyway, so no need to check the version. */
+#define MBEDTLS_AESNI_HAVE_INTRINSICS
 #endif
-#if defined(__GNUC__) && defined(__AES__)
-#define MBEDTLS_HAVE_AESNI_INTRINSICS
+/* GCC-like compilers: currently, we only support intrinsics if the requisite
+ * target flag is enabled when building the library (e.g. `gcc -mpclmul -msse2`
+ * or `clang -maes -mpclmul`). */
+#if defined(__GNUC__) && defined(__AES__) && defined(__PCLMUL__)
+#define MBEDTLS_AESNI_HAVE_INTRINSICS
 #endif
 
-#if defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
-#define MBEDTLS_AESNI_HAVE_CODE // via intrinsics
+/* Choose the implementation of AESNI, if one is available. */
+#undef MBEDTLS_AESNI_HAVE_CODE
+/* To minimize disruption when releasing the intrinsics-based implementation,
+ * favor the assembly-based implementation if it's available. We intend to
+ * revise this in a later release of Mbed TLS 3.x. In the long run, we will
+ * likely remove the assembly implementation. */
+#if defined(MBEDTLS_HAVE_X86_64)
+#define MBEDTLS_AESNI_HAVE_CODE 1 // via assembly
+#endif
+#if defined(MBEDTLS_AESNI_HAVE_INTRINSICS)
+#define MBEDTLS_AESNI_HAVE_CODE 2 // via intrinsics
 #endif
 
 #if defined(MBEDTLS_AESNI_HAVE_CODE)

From 0bfccfa5379f79f48befeee2641be739b7d8c435 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 17:49:44 +0100
Subject: [PATCH 48/53] Document the new state of AESNI support

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 include/mbedtls/mbedtls_config.h | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 7daba37400..62e12152b6 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -55,7 +55,7 @@
  *      library/padlock.h
  *
  * Required by:
- *      MBEDTLS_AESNI_C
+ *      MBEDTLS_AESNI_C (on some platforms)
  *      MBEDTLS_PADLOCK_C
  *
  * Comment to disable the use of assembly code.
@@ -2018,14 +2018,32 @@
 /**
  * \def MBEDTLS_AESNI_C
  *
- * Enable AES-NI support on x86-64.
+ * Enable AES-NI support on x86-64 or x86-32.
+ *
+ * \note AESNI is only supported with certain compilers and target options:
+ * - Visual Studio 2013: supported.
+ * - GCC, x86-64, target not explicitly supporting AESNI:
+ *   requires MBEDTLS_HAVE_ASM.
+ * - GCC, x86-32, target not explicitly supporting AESNI:
+ *   not supported.
+ * - GCC, x86-64 or x86-32, target supporting AESNI: supported.
+ *   For this assembly-less implementation, you must currently compile
+ *   `library/aesni.c` and `library/aes.c` with machine options to enable
+ *   SSE2 and AESNI instructions: `gcc -msse2 -maes -mpclmul` or
+ *   `clang -maes -mpclmul`.
+ * - Non-x86 targets: this option is silently ignored.
+ * - Other compilers: this option is silently ignored.
+ *
+ * \note
+ * Above, "GCC" includes compatible compilers such as Clang.
+ * The limitations on target support are likely to be relaxed in the future.
  *
  * Module:  library/aesni.c
  * Caller:  library/aes.c
  *
- * Requires: MBEDTLS_HAVE_ASM
+ * Requires: MBEDTLS_HAVE_ASM (on some platforms, see note)
  *
- * This modules adds support for the AES-NI instructions on x86-64
+ * This modules adds support for the AES-NI instructions on x86.
  */
 #define MBEDTLS_AESNI_C
 

From 74b4223c81ac9256d531d5b688a7d5e8de5040b4 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 17:50:15 +0100
Subject: [PATCH 49/53] Announce the expanded AESNI support

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 ChangeLog.d/aesni.txt | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 ChangeLog.d/aesni.txt

diff --git a/ChangeLog.d/aesni.txt b/ChangeLog.d/aesni.txt
new file mode 100644
index 0000000000..2d90a6e1cc
--- /dev/null
+++ b/ChangeLog.d/aesni.txt
@@ -0,0 +1,7 @@
+Features
+   * AES-NI is now supported with Visual Studio.
+   * AES-NI is now supported in 32-bit builds, or when MBEDTLS_HAVE_ASM
+     is disabled, when compiling with GCC or Clang or a compatible compiler
+     for a target CPU that supports the requisite instructions (for example
+     gcc -m32 -msse2 -maes -mpclmul). (Generic x86 builds with GCC-like
+     compilers still require MBEDTLS_HAVE_ASM and a 64-bit target.)

From 646ee7ec2ee56b0b0b4de3e7c198fcb61d7e53bd Mon Sep 17 00:00:00 2001
From: Paul Elliott <paul.elliott@arm.com>
Date: Thu, 16 Mar 2023 17:10:34 +0000
Subject: [PATCH 50/53] Fix CI build after repo merge conflict

After merging the driver only ECDSA work, a conflict arose between that and
the previous work re-ordering the ciphersuite preference list. We can remove
the breaking requirement on this test, as this requirement is now auto-detected
when the server5 crt is used in the server's command line.

Signed-off-by: Paul Elliott <paul.elliott@arm.com>
---
 tests/ssl-opt.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/ssl-opt.sh b/tests/ssl-opt.sh
index 7c165f3108..4fb39f31ef 100755
--- a/tests/ssl-opt.sh
+++ b/tests/ssl-opt.sh
@@ -11478,7 +11478,6 @@ requires_config_enabled MBEDTLS_SSL_TLS1_3_KEY_EXCHANGE_MODE_EPHEMERAL_ENABLED
 requires_ciphersuite_enabled TLS1-3-CHACHA20-POLY1305-SHA256
 requires_config_enabled MBEDTLS_ECP_DP_CURVE25519_ENABLED
 requires_config_enabled MBEDTLS_ECP_DP_SECP256R1_ENABLED
-requires_config_enabled MBEDTLS_ECDSA_C
 run_test    "TLS 1.3: Default" \
             "$P_SRV allow_sha1=0 debug_level=3 crt_file=data_files/server5.crt key_file=data_files/server5.key force_version=tls13" \
             "$P_CLI allow_sha1=0" \

From 28e4dc1e398f57230bba45f0f9451f24bf3b4016 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 16 Mar 2023 21:39:47 +0100
Subject: [PATCH 51/53] Fix use of arithmetic on void*

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/suites/test_suite_aes.function | 107 ++++++++++++++-------------
 1 file changed, 55 insertions(+), 52 deletions(-)

diff --git a/tests/suites/test_suite_aes.function b/tests/suites/test_suite_aes.function
index a535086bb6..363a5fd27c 100644
--- a/tests/suites/test_suite_aes.function
+++ b/tests/suites/test_suite_aes.function
@@ -529,81 +529,84 @@ void aes_ecb_copy_context(data_t *key)
     /* We test context copying multiple times, with different alignments
      * of the original and of the copies. */
 
-    void *src = NULL; // Memory block containing the original context
-    void *enc = NULL; // Memory block containing the copy doing encryption
-    void *dec = NULL; // Memory block containing the copy doing decryption
+    struct align0 {
+        mbedtls_aes_context ctx;
+    };
+    struct align0 *src0 = NULL;
+    struct align0 *enc0 = NULL;
+    struct align0 *dec0 = NULL;
 
     struct align1 {
         char bump;
         mbedtls_aes_context ctx;
     };
+    struct align1 *src1 = NULL;
+    struct align1 *enc1 = NULL;
+    struct align1 *dec1 = NULL;
 
     /* All peak alignment */
-    ASSERT_ALLOC(src, sizeof(mbedtls_aes_context));
-    ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context));
-    ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context));
-    if (!test_copy(key, src, enc, dec)) {
+    ASSERT_ALLOC(src0, 1);
+    ASSERT_ALLOC(enc0, 1);
+    ASSERT_ALLOC(dec0, 1);
+    if (!test_copy(key, &src0->ctx, &enc0->ctx, &dec0->ctx)) {
         goto exit;
     }
-    mbedtls_free(src);
-    src = NULL;
-    mbedtls_free(enc);
-    enc = NULL;
-    mbedtls_free(dec);
-    dec = NULL;
+    mbedtls_free(src0);
+    src0 = NULL;
+    mbedtls_free(enc0);
+    enc0 = NULL;
+    mbedtls_free(dec0);
+    dec0 = NULL;
 
     /* Original shifted */
-    ASSERT_ALLOC(src, sizeof(struct align1));
-    ASSERT_ALLOC(enc, sizeof(mbedtls_aes_context));
-    ASSERT_ALLOC(dec, sizeof(mbedtls_aes_context));
-    if (!test_copy(key, &((struct align1 *) src)->ctx, enc, dec)) {
+    ASSERT_ALLOC(src1, 1);
+    ASSERT_ALLOC(enc0, 1);
+    ASSERT_ALLOC(dec0, 1);
+    if (!test_copy(key, &src1->ctx, &enc0->ctx, &dec0->ctx)) {
         goto exit;
     }
-    mbedtls_free(src);
-    src = NULL;
-    mbedtls_free(enc);
-    enc = NULL;
-    mbedtls_free(dec);
-    dec = NULL;
+    mbedtls_free(src1);
+    src1 = NULL;
+    mbedtls_free(enc0);
+    enc0 = NULL;
+    mbedtls_free(dec0);
+    dec0 = NULL;
 
     /* Copies shifted */
-    ASSERT_ALLOC(src, sizeof(mbedtls_aes_context));
-    ASSERT_ALLOC(enc, sizeof(struct align1));
-    ASSERT_ALLOC(dec, sizeof(struct align1));
-    if (!test_copy(key,
-                   src,
-                   &((struct align1 *) enc)->ctx,
-                   &((struct align1 *) dec)->ctx)) {
+    ASSERT_ALLOC(src0, 1);
+    ASSERT_ALLOC(enc1, 1);
+    ASSERT_ALLOC(dec1, 1);
+    if (!test_copy(key, &src0->ctx, &enc1->ctx, &dec1->ctx)) {
         goto exit;
     }
-    mbedtls_free(src);
-    src = NULL;
-    mbedtls_free(enc);
-    enc = NULL;
-    mbedtls_free(dec);
-    dec = NULL;
+    mbedtls_free(src0);
+    src0 = NULL;
+    mbedtls_free(enc1);
+    enc1 = NULL;
+    mbedtls_free(dec1);
+    dec1 = NULL;
 
     /* Source and copies shifted */
-    ASSERT_ALLOC(src, sizeof(struct align1));
-    ASSERT_ALLOC(enc, sizeof(struct align1));
-    ASSERT_ALLOC(dec, sizeof(struct align1));
-    if (!test_copy(key,
-                   &((struct align1 *) src)->ctx,
-                   &((struct align1 *) enc)->ctx,
-                   &((struct align1 *) dec)->ctx)) {
+    ASSERT_ALLOC(src1, 1);
+    ASSERT_ALLOC(enc1, 1);
+    ASSERT_ALLOC(dec1, 1);
+    if (!test_copy(key, &src1->ctx, &enc1->ctx, &dec1->ctx)) {
         goto exit;
     }
-    mbedtls_free(src);
-    src = NULL;
-    mbedtls_free(enc);
-    enc = NULL;
-    mbedtls_free(dec);
-    dec = NULL;
+    mbedtls_free(src1);
+    src1 = NULL;
+    mbedtls_free(enc1);
+    enc1 = NULL;
+    mbedtls_free(dec1);
+    dec1 = NULL;
 
 exit:
-    mbedtls_free(src);
-    mbedtls_free(enc);
-    mbedtls_free(dec);
+    mbedtls_free(src0);
+    mbedtls_free(enc0);
+    mbedtls_free(dec0);
+    mbedtls_free(src1);
+    mbedtls_free(enc1);
+    mbedtls_free(dec1);
 }
 /* END_CASE */
 

From 30e9f2a293287304f3a314c8cce6725466269e4e Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 17 Mar 2023 17:29:58 +0100
Subject: [PATCH 52/53] Finish sentence in comment

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/aesni.h b/library/aesni.h
index a81a11725d..2493998c67 100644
--- a/library/aesni.h
+++ b/library/aesni.h
@@ -44,7 +44,7 @@
 #if defined(MBEDTLS_AESNI_C)
 
 /* Can we do AESNI with intrinsics?
- * (Only implemented with certain compilers, .)
+ * (Only implemented with certain compilers, only for certain targets.)
  */
 #undef MBEDTLS_AESNI_HAVE_INTRINSICS
 #if defined(_MSC_VER)

From 36b9e47eed1fdf4be05d516b1311af4bc0566169 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 17 Mar 2023 17:30:29 +0100
Subject: [PATCH 53/53] Fix preprocessor conditional

This was intended as an if-else-if chain. Make it so.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 library/aesni.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/library/aesni.h b/library/aesni.h
index 2493998c67..51b770f316 100644
--- a/library/aesni.h
+++ b/library/aesni.h
@@ -67,8 +67,7 @@
  * likely remove the assembly implementation. */
 #if defined(MBEDTLS_HAVE_X86_64)
 #define MBEDTLS_AESNI_HAVE_CODE 1 // via assembly
-#endif
-#if defined(MBEDTLS_AESNI_HAVE_INTRINSICS)
+#elif defined(MBEDTLS_AESNI_HAVE_INTRINSICS)
 #define MBEDTLS_AESNI_HAVE_CODE 2 // via intrinsics
 #endif