From 42240423953ada23aef71bddccbf0b78bc77d0a5 Mon Sep 17 00:00:00 2001 From: Xu Chun Guang Date: Thu, 8 May 2025 12:10:15 +0800 Subject: [PATCH] feat: Perform an exponentiation by the algorithm of mbedtls V2 which is faster --- components/mbedtls/mbedtls_v3/CMakeLists.txt | 4 + components/mbedtls/mbedtls_v3/Kconfig | 7 + components/mbedtls/mbedtls_v3/component.mk | 2 +- .../mbedtls/mbedtls_v3/port/esp_bignum.c | 1000 +++++++---------- .../port/include/mbedtls/esp_config.h | 8 +- 5 files changed, 411 insertions(+), 610 deletions(-) diff --git a/components/mbedtls/mbedtls_v3/CMakeLists.txt b/components/mbedtls/mbedtls_v3/CMakeLists.txt index dbfc6dc2..56ce65f5 100644 --- a/components/mbedtls/mbedtls_v3/CMakeLists.txt +++ b/components/mbedtls/mbedtls_v3/CMakeLists.txt @@ -196,6 +196,10 @@ target_sources(mbedcrypto PRIVATE "${current_dir}/port/esp_mem.c" "${current_dir}/port/esp_timing.c" "${current_dir}/port/esp_hardware.c") +if(CONFIG_MBEDTLS_MPI_EXP_MOD_ALT) + target_sources(mbedcrypto PRIVATE "${current_dir}/port/esp_bignum.c") +endif() + if(CONFIG_SOC_AES_SUPPORTED) target_sources(mbedcrypto PRIVATE "${current_dir}/port/aes/esp_aes_xts.c" "${current_dir}/port/aes/esp_aes_common.c" diff --git a/components/mbedtls/mbedtls_v3/Kconfig b/components/mbedtls/mbedtls_v3/Kconfig index fffa5f82..dfb873eb 100644 --- a/components/mbedtls/mbedtls_v3/Kconfig +++ b/components/mbedtls/mbedtls_v3/Kconfig @@ -639,6 +639,13 @@ config MBEDTLS_SHA3_C Enabling this configuration option increases the flash footprint by almost 4KB. +config MBEDTLS_MPI_EXP_MOD_ALT + bool "Enable the alternative exponentiation" + default y + help + Perform an exponentiation by the algorithm of mbedtls V2, + which is faster when calling "mbedtls_mpi_exp_mod()". + choice MBEDTLS_TLS_MODE bool "TLS Protocol Role" default MBEDTLS_TLS_SERVER_AND_CLIENT diff --git a/components/mbedtls/mbedtls_v3/component.mk b/components/mbedtls/mbedtls_v3/component.mk index 66da38fb..227e1bcf 100644 --- a/components/mbedtls/mbedtls_v3/component.mk +++ b/components/mbedtls/mbedtls_v3/component.mk @@ -10,7 +10,7 @@ COMPONENT_SRCDIRS := $(CURRENT_DIR)/mbedtls/library $(CURRENT_DIR)/port COMPONENT_OBJEXCLUDE := $(CURRENT_DIR)/mbedtls/library/net_sockets.o -ifndef CONFIG_MBEDTLS_HARDWARE_MPI +ifndef CONFIG_MBEDTLS_MPI_EXP_MOD_ALT COMPONENT_OBJEXCLUDE += $(CURRENT_DIR)/port/esp_bignum.o endif diff --git a/components/mbedtls/mbedtls_v3/port/esp_bignum.c b/components/mbedtls/mbedtls_v3/port/esp_bignum.c index e9596bec..0fd608a2 100644 --- a/components/mbedtls/mbedtls_v3/port/esp_bignum.c +++ b/components/mbedtls/mbedtls_v3/port/esp_bignum.c @@ -1,454 +1,245 @@ /* - * Multi-precision integer library - * ESP-IDF hardware accelerated parts based on mbedTLS implementation + * Multi-precision integer library * - * SPDX-FileCopyrightText: The Mbed TLS Contributors - * - * SPDX-License-Identifier: Apache-2.0 - * - * SPDX-FileContributor: 2016-2023 Espressif Systems (Shanghai) CO LTD + * Copyright The Mbed TLS Contributors + * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */ -#include -#include -#include + +/* + * The following sources were referenced in the design of this Multi-precision + * Integer library: + * + * [1] Handbook of Applied Cryptography - 1997 + * Menezes, van Oorschot and Vanstone + * + * [2] Multi-Precision Math + * Tom St Denis + * https://github.com/libtom/libtommath/blob/develop/tommath.pdf + * + * [3] GNU Multi-Precision Arithmetic Library + * https://gmplib.org/manual/index.html + * + */ + +#include "common.h" + +#if defined(MBEDTLS_BIGNUM_C) + +#include "mbedtls/bignum.h" +#include "bignum_core.h" +#include "bignum_internal.h" +#include "bn_mul.h" +#include "mbedtls/platform_util.h" +#include "mbedtls/error.h" +#include "constant_time_internal.h" + #include -#include -#include -#include +#include -#include "esp_system.h" -#include "esp_log.h" -#include "esp_attr.h" -#include "esp_intr_alloc.h" -#if CONFIG_PM_ENABLE -#include "esp_pm.h" -#endif +#include "mbedtls/platform.h" -#include "freertos/FreeRTOS.h" -#include "freertos/semphr.h" +#if defined(MBEDTLS_MPI_EXP_MOD_ALT) +#define MBEDTLS_INTERNAL_VALIDATE_RET( cond, ret ) \ + do { \ + if( !(cond) ) \ + { \ + return( ret ); \ + } \ + } while( 0 ) -#include "soc/hwcrypto_periph.h" -#include "soc/periph_defs.h" -#include "soc/soc_caps.h" +/* Internal macro meant to be called only from within the library. */ +#define MBEDTLS_INTERNAL_VALIDATE( cond ) \ + do { \ + if( !(cond) ) \ + { \ + return; \ + } \ + } while( 0 ) -#include "bignum_impl.h" +#define MPI_VALIDATE_RET( cond ) \ + MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_MPI_BAD_INPUT_DATA ) +#define MPI_VALIDATE( cond ) \ + MBEDTLS_INTERNAL_VALIDATE( cond ) -#include +#define MULADDC_INIT \ +{ \ + mbedtls_mpi_uint s0, s1, b0, b1; \ + mbedtls_mpi_uint r0, r1, rx, ry; \ + b0 = ( b << biH ) >> biH; \ + b1 = ( b >> biH ); +#define MULADDC_CORE \ + s0 = ( *s << biH ) >> biH; \ + s1 = ( *s >> biH ); s++; \ + rx = s0 * b1; r0 = s0 * b0; \ + ry = s1 * b0; r1 = s1 * b1; \ + r1 += ( rx >> biH ); \ + r1 += ( ry >> biH ); \ + rx <<= biH; ry <<= biH; \ + r0 += rx; r1 += (r0 < rx); \ + r0 += ry; r1 += (r0 < ry); \ + r0 += c; r1 += (r0 < c); \ + r0 += *d; r1 += (r0 < *d); \ + c = r1; *(d++) = r0; -/* Some implementation notes: - * - * - Naming convention x_words, y_words, z_words for number of words (limbs) used in a particular - * bignum. This number may be less than the size of the bignum - * - * - Naming convention hw_words for the hardware length of the operation. This number maybe be rounded up - * for targets that requres this (e.g. ESP32), and may be larger than any of the numbers - * involved in the calculation. - * - * - Timing behaviour of these functions will depend on the length of the inputs. This is fundamentally - * the same constraint as the software mbedTLS implementations, and relies on the same - * countermeasures (exponent blinding, etc) which are used in mbedTLS. - */ - -static const __attribute__((unused)) char *TAG = "bignum"; - -#define ciL (sizeof(mbedtls_mpi_uint)) /* chars in limb */ -#define biL (ciL << 3) /* bits in limb */ - -#if defined(CONFIG_MBEDTLS_MPI_USE_INTERRUPT) -static SemaphoreHandle_t op_complete_sem; -#if defined(CONFIG_PM_ENABLE) -static esp_pm_lock_handle_t s_pm_cpu_lock; -static esp_pm_lock_handle_t s_pm_sleep_lock; -#endif - -static IRAM_ATTR void esp_mpi_complete_isr(void *arg) -{ - BaseType_t higher_woken; - esp_mpi_interrupt_clear(); - - xSemaphoreGiveFromISR(op_complete_sem, &higher_woken); - if (higher_woken) { - portYIELD_FROM_ISR(); - } +#define MULADDC_STOP \ } - -static esp_err_t esp_mpi_isr_initialise(void) +static void mpi_sub_hlp( size_t n, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d ) { - esp_mpi_interrupt_clear(); - esp_mpi_interrupt_enable(true); - if (op_complete_sem == NULL) { - static StaticSemaphore_t op_sem_buf; - op_complete_sem = xSemaphoreCreateBinaryStatic(&op_sem_buf); - if (op_complete_sem == NULL) { - ESP_LOGE(TAG, "Failed to create intr semaphore"); - return ESP_FAIL; - } + size_t i; + mbedtls_mpi_uint c, z; - esp_err_t ret; - ret = esp_intr_alloc(ETS_RSA_INTR_SOURCE, 0, esp_mpi_complete_isr, NULL, NULL); - if (ret != ESP_OK) { - ESP_LOGE(TAG, "Failed to allocate RSA interrupt %d", ret); - - // This should be treated as fatal error as this API would mostly - // be invoked within mbedTLS interface. There is no way for the system - // to proceed if the MPI interrupt allocation fails here. - abort(); - } - } - - /* MPI is clocked proportionally to CPU clock, take power management lock */ -#ifdef CONFIG_PM_ENABLE - if (s_pm_cpu_lock == NULL) { - if (esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "mpi_sleep", &s_pm_sleep_lock) != ESP_OK) { - ESP_LOGE(TAG, "Failed to create PM sleep lock"); - return ESP_FAIL; - } - if (esp_pm_lock_create(ESP_PM_CPU_FREQ_MAX, 0, "mpi_cpu", &s_pm_cpu_lock) != ESP_OK) { - ESP_LOGE(TAG, "Failed to create PM CPU lock"); - return ESP_FAIL; - } - } - esp_pm_lock_acquire(s_pm_cpu_lock); - esp_pm_lock_acquire(s_pm_sleep_lock); -#endif - - return ESP_OK; -} - -static int esp_mpi_wait_intr(void) -{ - if (!xSemaphoreTake(op_complete_sem, 2000 / portTICK_PERIOD_MS)) { - ESP_LOGE("MPI", "Timed out waiting for completion of MPI Interrupt"); - return -1; - } - -#ifdef CONFIG_PM_ENABLE - esp_pm_lock_release(s_pm_cpu_lock); - esp_pm_lock_release(s_pm_sleep_lock); -#endif // CONFIG_PM_ENABLE - - esp_mpi_interrupt_enable(false); - - return 0; -} - -#endif // CONFIG_MBEDTLS_MPI_USE_INTERRUPT - -/* Convert bit count to word count - */ -static inline size_t bits_to_words(size_t bits) -{ - return (bits + 31) / 32; -} - -/* Return the number of words actually used to represent an mpi - number. -*/ -#if defined(MBEDTLS_MPI_EXP_MOD_ALT) || defined(MBEDTLS_MPI_EXP_MOD_ALT_FALLBACK) -static size_t mpi_words(const mbedtls_mpi *mpi) -{ - for (size_t i = mpi->MBEDTLS_PRIVATE(n); i > 0; i--) { - if (mpi->MBEDTLS_PRIVATE(p[i - 1]) != 0) { - return i; - } - } - return 0; -} - -#endif //(MBEDTLS_MPI_EXP_MOD_ALT || MBEDTLS_MPI_EXP_MOD_ALT_FALLBACK) - -/** - * - * There is a need for the value of integer N' such that B^-1(B-1)-N^-1N'=1, - * where B^-1(B-1) mod N=1. Actually, only the least significant part of - * N' is needed, hence the definition N0'=N' mod b. We reproduce below the - * simple algorithm from an article by Dusse and Kaliski to efficiently - * find N0' from N0 and b - */ -static mbedtls_mpi_uint modular_inverse(const mbedtls_mpi *M) -{ - int i; - uint64_t t = 1; - uint64_t two_2_i_minus_1 = 2; /* 2^(i-1) */ - uint64_t two_2_i = 4; /* 2^i */ - uint64_t N = M->MBEDTLS_PRIVATE(p[0]); - - for (i = 2; i <= 32; i++) { - if ((mbedtls_mpi_uint) N * t % two_2_i >= two_2_i_minus_1) { - t += two_2_i_minus_1; - } - - two_2_i_minus_1 <<= 1; - two_2_i <<= 1; - } - - return (mbedtls_mpi_uint)(UINT32_MAX - t + 1); -} - -/* Calculate Rinv = RR^2 mod M, where: - * - * R = b^n where b = 2^32, n=num_words, - * R = 2^N (where N=num_bits) - * RR = R^2 = 2^(2*N) (where N=num_bits=num_words*32) - * - * This calculation is computationally expensive (mbedtls_mpi_mod_mpi) - * so caller should cache the result where possible. - * - * DO NOT call this function while holding esp_mpi_enable_hardware_hw_op(). - * - */ -static int calculate_rinv(mbedtls_mpi *Rinv, const mbedtls_mpi *M, int num_words) -{ - int ret; - size_t num_bits = num_words * 32; - mbedtls_mpi RR; - mbedtls_mpi_init(&RR); - MBEDTLS_MPI_CHK(mbedtls_mpi_set_bit(&RR, num_bits * 2, 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(Rinv, &RR, M)); - -cleanup: - mbedtls_mpi_free(&RR); - - return ret; -} - - - - - - -/* Z = (X * Y) mod M - - Not an mbedTLS function -*/ -int esp_mpi_mul_mpi_mod(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M) -{ - int ret = 0; - - size_t x_bits = mbedtls_mpi_bitlen(X); - size_t y_bits = mbedtls_mpi_bitlen(Y); - size_t m_bits = mbedtls_mpi_bitlen(M); - size_t z_bits = MIN(m_bits, x_bits + y_bits); - size_t x_words = bits_to_words(x_bits); - size_t y_words = bits_to_words(y_bits); - size_t m_words = bits_to_words(m_bits); - size_t z_words = bits_to_words(z_bits); - size_t hw_words = esp_mpi_hardware_words(MAX(x_words, MAX(y_words, m_words))); /* longest operand */ - mbedtls_mpi Rinv; - mbedtls_mpi_uint Mprime; - - /* Calculate and load the first stage montgomery multiplication */ - mbedtls_mpi_init(&Rinv); - MBEDTLS_MPI_CHK(calculate_rinv(&Rinv, M, hw_words)); - Mprime = modular_inverse(M); - - esp_mpi_enable_hardware_hw_op(); - /* Load and start a (X * Y) mod M calculation */ - esp_mpi_mul_mpi_mod_hw_op(X, Y, M, &Rinv, Mprime, hw_words); - - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(Z, z_words)); - - esp_mpi_read_result_hw_op(Z, z_words); - Z->MBEDTLS_PRIVATE(s) = X->MBEDTLS_PRIVATE(s) * Y->MBEDTLS_PRIVATE(s); - -cleanup: - mbedtls_mpi_free(&Rinv); - esp_mpi_disable_hardware_hw_op(); - - return ret; -} - -#if defined(MBEDTLS_MPI_EXP_MOD_ALT) || defined(MBEDTLS_MPI_EXP_MOD_ALT_FALLBACK) - -#ifdef ESP_MPI_USE_MONT_EXP -/* - * Return the most significant one-bit. - */ -static size_t mbedtls_mpi_msb( const mbedtls_mpi *X ) -{ - int i, j; - if (X != NULL && X->MBEDTLS_PRIVATE(n) != 0) { - for (i = X->MBEDTLS_PRIVATE(n) - 1; i >= 0; i--) { - if (X->MBEDTLS_PRIVATE(p[i]) != 0) { - for (j = biL - 1; j >= 0; j--) { - if ((X->MBEDTLS_PRIVATE(p[i]) & (1 << j)) != 0) { - return (i * biL) + j; - } - } - } - } - } - return 0; -} - -/* - * Montgomery exponentiation: Z = X ^ Y mod M (HAC 14.94) - */ -static int mpi_montgomery_exp_calc( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, - mbedtls_mpi *Rinv, - size_t hw_words, - mbedtls_mpi_uint Mprime ) -{ - int ret = 0; - mbedtls_mpi X_, one; - - mbedtls_mpi_init(&X_); - mbedtls_mpi_init(&one); - if ( ( ( ret = mbedtls_mpi_grow(&one, hw_words) ) != 0 ) || - ( ( ret = mbedtls_mpi_set_bit(&one, 0, 1) ) != 0 ) ) { - goto cleanup2; - } - - // Algorithm from HAC 14.94 + for( i = c = 0; i < n; i++, s++, d++ ) { - // 0 determine t (highest bit set in y) - int t = mbedtls_mpi_msb(Y); - - esp_mpi_enable_hardware_hw_op(); - - // 1.1 x_ = mont(x, R^2 mod m) - // = mont(x, rb) - MBEDTLS_MPI_CHK( esp_mont_hw_op(&X_, X, Rinv, M, Mprime, hw_words, false) ); - - // 1.2 z = R mod m - // now z = R mod m = Mont (R^2 mod m, 1) mod M (as Mont(x) = X&R^-1 mod M) - MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Rinv, &one, M, Mprime, hw_words, true) ); - - // 2 for i from t down to 0 - for (int i = t; i >= 0; i--) { - // 2.1 z = mont(z,z) - if (i != t) { // skip on the first iteration as is still unity - MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Z, Z, M, Mprime, hw_words, true) ); - } - - // 2.2 if y[i] = 1 then z = mont(A, x_) - if (mbedtls_mpi_get_bit(Y, i)) { - MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Z, &X_, M, Mprime, hw_words, true) ); - } - } - - // 3 z = Mont(z, 1) - MBEDTLS_MPI_CHK( esp_mont_hw_op(Z, Z, &one, M, Mprime, hw_words, true) ); + z = ( *d < c ); *d -= c; + c = ( *d < *s ) + z; *d -= *s; } -cleanup: - esp_mpi_disable_hardware_hw_op(); - -cleanup2: - mbedtls_mpi_free(&X_); - mbedtls_mpi_free(&one); - return ret; + while( c != 0 ) + { + z = ( *d < c ); *d -= c; + c = z; d++; + } } -#endif //USE_MONT_EXPONENATIATION + +void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b ); + +void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b ) +{ + mbedtls_mpi_uint c = 0, t = 0; + +#if defined(MULADDC_HUIT) + for( ; i >= 8; i -= 8 ) + { + MULADDC_INIT + MULADDC_HUIT + MULADDC_STOP + } + + for( ; i > 0; i-- ) + { + MULADDC_INIT + MULADDC_CORE + MULADDC_STOP + } +#else /* MULADDC_HUIT */ + for( ; i >= 16; i -= 16 ) + { + MULADDC_INIT + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + MULADDC_STOP + } + + for( ; i >= 8; i -= 8 ) + { + MULADDC_INIT + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + + MULADDC_CORE MULADDC_CORE + MULADDC_CORE MULADDC_CORE + MULADDC_STOP + } + + for( ; i > 0; i-- ) + { + MULADDC_INIT + MULADDC_CORE + MULADDC_STOP + } +#endif /* MULADDC_HUIT */ + + t++; + + do { + *d += c; c = ( *d < c ); d++; + } + while( c != 0 ); +} +/* + * Fast Montgomery initialization (thanks to Tom St Denis) + */ +static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N ) +{ + mbedtls_mpi_uint x, m0 = N->p[0]; + unsigned int i; + + x = m0; + x += ( ( m0 + 2 ) & 4 ) << 1; + + for( i = biL; i >= 8; i /= 2 ) + x *= ( 2 - ( m0 * x ) ); + + *mm = ~x + 1; +} /* - * Z = X ^ Y mod M - * - * _Rinv is optional pre-calculated version of Rinv (via calculate_rinv()). - * - * (See RSA Accelerator section in Technical Reference for more about Mprime, Rinv) - * + * Montgomery multiplication: A = A * B * R^-1 mod N (HAC 14.36) */ -static int esp_mpi_exp_mod( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, mbedtls_mpi *_Rinv ) +static int mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm, + const mbedtls_mpi *T ) { - int ret = 0; + size_t i, n, m; + mbedtls_mpi_uint u0, u1, *d; - mbedtls_mpi Rinv_new; /* used if _Rinv == NULL */ - mbedtls_mpi *Rinv; /* points to _Rinv (if not NULL) othwerwise &RR_new */ - mbedtls_mpi_uint Mprime; + if( T->n < N->n + 1 || T->p == NULL ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); - size_t x_words = mpi_words(X); - size_t y_words = mpi_words(Y); - size_t m_words = mpi_words(M); + memset( T->p, 0, T->n * ciL ); - /* "all numbers must be the same length", so choose longest number - as cardinal length of operation... - */ - size_t num_words = esp_mpi_hardware_words(MAX(m_words, MAX(x_words, y_words))); + d = T->p; + n = N->n; + m = ( B->n < n ) ? B->n : n; - if (num_words * 32 > SOC_RSA_MAX_BIT_LEN) { - return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; + for( i = 0; i < n; i++ ) + { + /* + * T = (T + u0*B + u1*N) / 2^biL + */ + u0 = A->p[i]; + u1 = ( d[0] + u0 * B->p[0] ) * mm; + + mpi_mul_hlp( m, B->p, d, u0 ); + mpi_mul_hlp( n, N->p, d, u1 ); + + *d++ = u0; d[n + 1] = 0; } - if (mbedtls_mpi_cmp_int(M, 0) <= 0 || (M->MBEDTLS_PRIVATE(p[0]) & 1) == 0) { - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - } + memcpy( A->p, d, ( n + 1 ) * ciL ); - if (mbedtls_mpi_cmp_int(Y, 0) < 0) { - return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; - } + if( mbedtls_mpi_cmp_abs( A, N ) >= 0 ) + mpi_sub_hlp( n, N->p, A->p ); + else + /* prevent timing attacks */ + mpi_sub_hlp( n, A->p, T->p ); - if (mbedtls_mpi_cmp_int(Y, 0) == 0) { - return mbedtls_mpi_lset(Z, 1); - } - - /* Determine RR pointer, either _RR for cached value - or local RR_new */ - if (_Rinv == NULL) { - mbedtls_mpi_init(&Rinv_new); - Rinv = &Rinv_new; - } else { - Rinv = _Rinv; - } - if (Rinv->MBEDTLS_PRIVATE(p) == NULL) { - MBEDTLS_MPI_CHK(calculate_rinv(Rinv, M, num_words)); - } - - Mprime = modular_inverse(M); - - // Montgomery exponentiation: Z = X ^ Y mod M (HAC 14.94) -#ifdef ESP_MPI_USE_MONT_EXP - ret = mpi_montgomery_exp_calc(Z, X, Y, M, Rinv, num_words, Mprime) ; - MBEDTLS_MPI_CHK(ret); -#else - esp_mpi_enable_hardware_hw_op(); - -#if defined (CONFIG_MBEDTLS_MPI_USE_INTERRUPT) - if (esp_mpi_isr_initialise() != ESP_OK) { - ret = -1; - esp_mpi_disable_hardware_hw_op(); - goto cleanup; - } -#endif - - esp_mpi_exp_mpi_mod_hw_op(X, Y, M, Rinv, Mprime, num_words); - ret = mbedtls_mpi_grow(Z, m_words); - if (ret != 0) { - esp_mpi_disable_hardware_hw_op(); - goto cleanup; - } - -#if defined(CONFIG_MBEDTLS_MPI_USE_INTERRUPT) - ret = esp_mpi_wait_intr(); - if (ret != 0) { - esp_mpi_disable_hardware_hw_op(); - goto cleanup; - } -#endif //CONFIG_MBEDTLS_MPI_USE_INTERRUPT - - esp_mpi_read_result_hw_op(Z, m_words); - esp_mpi_disable_hardware_hw_op(); -#endif - - // Compensate for negative X - if (X->MBEDTLS_PRIVATE(s) == -1 && (Y->MBEDTLS_PRIVATE(p[0]) & 1) != 0) { - Z->MBEDTLS_PRIVATE(s) = -1; - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(Z, M, Z)); - } else { - Z->MBEDTLS_PRIVATE(s) = 1; - } - -cleanup: - if (_Rinv == NULL) { - mbedtls_mpi_free(&Rinv_new); - } - return ret; + return( 0 ); } -#endif /* (MBEDTLS_MPI_EXP_MOD_ALT || MBEDTLS_MPI_EXP_MOD_ALT_FALLBACK) */ +/* + * Montgomery reduction: A = A * R^-1 mod N + */ +static int mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N, + mbedtls_mpi_uint mm, const mbedtls_mpi *T ) +{ + mbedtls_mpi_uint z = 1; + mbedtls_mpi U; + + U.n = U.s = (int) z; + U.p = &z; + + return( mpi_montmul( A, &U, N, mm, T ) ); +} /* * Sliding-window exponentiation: X = A^E mod N (HAC 14.85) @@ -458,220 +249,213 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi *_RR ) { int ret; -#if defined(MBEDTLS_MPI_EXP_MOD_ALT_FALLBACK) - /* Try hardware API first and then fallback to software */ - ret = esp_mpi_exp_mod( X, A, E, N, _RR ); - if( ret == MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ) { - ret = mbedtls_mpi_exp_mod_soft( X, A, E, N, _RR ); - } -#else - /* Hardware approach */ - ret = esp_mpi_exp_mod( X, A, E, N, _RR ); + size_t wbits, wsize, one = 1; + size_t i, j, nblimbs; + size_t bufsize, nbits; + mbedtls_mpi_uint ei, mm, state; + mbedtls_mpi RR, T, W[ 2 << MBEDTLS_MPI_WINDOW_SIZE ], Apos; + int neg; + + MPI_VALIDATE_RET( X != NULL ); + MPI_VALIDATE_RET( A != NULL ); + MPI_VALIDATE_RET( E != NULL ); + MPI_VALIDATE_RET( N != NULL ); + + if( mbedtls_mpi_cmp_int( N, 0 ) <= 0 || ( N->p[0] & 1 ) == 0 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + + if( mbedtls_mpi_cmp_int( E, 0 ) < 0 ) + return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + + /* + * Init temps and window size + */ + mpi_montg_init( &mm, N ); + mbedtls_mpi_init( &RR ); mbedtls_mpi_init( &T ); + mbedtls_mpi_init( &Apos ); + memset( W, 0, sizeof( W ) ); + + i = mbedtls_mpi_bitlen( E ); + + wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 : + ( i > 79 ) ? 4 : ( i > 23 ) ? 3 : 1; + +#if( MBEDTLS_MPI_WINDOW_SIZE < 6 ) + if( wsize > MBEDTLS_MPI_WINDOW_SIZE ) + wsize = MBEDTLS_MPI_WINDOW_SIZE; #endif - /* Note: For software only approach, it gets handled in mbedTLS library. - This file is not part of build objects for that case */ - return ret; -} + j = N->n + 1; + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1], j ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) ); -#if defined(MBEDTLS_MPI_MUL_MPI_ALT) /* MBEDTLS_MPI_MUL_MPI_ALT */ - -static int mpi_mult_mpi_failover_mod_mult( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t z_words); -static int mpi_mult_mpi_overlong(mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t y_words, size_t z_words); - -/* Z = X * Y */ -int mbedtls_mpi_mul_mpi( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y ) -{ - int ret = 0; - size_t x_bits = mbedtls_mpi_bitlen(X); - size_t y_bits = mbedtls_mpi_bitlen(Y); - size_t x_words = bits_to_words(x_bits); - size_t y_words = bits_to_words(y_bits); - size_t z_words = bits_to_words(x_bits + y_bits); - size_t hw_words = esp_mpi_hardware_words(MAX(x_words, y_words)); // length of one operand in hardware - - /* Short-circuit eval if either argument is 0 or 1. - - This is needed as the mpi modular division - argument will sometimes call in here when one - argument is too large for the hardware unit, but the other - argument is zero or one. - */ - if (x_bits == 0 || y_bits == 0) { - mbedtls_mpi_lset(Z, 0); - return 0; - } - if (x_bits == 1) { - ret = mbedtls_mpi_copy(Z, Y); - Z->MBEDTLS_PRIVATE(s) *= X->MBEDTLS_PRIVATE(s); - return ret; - } - if (y_bits == 1) { - ret = mbedtls_mpi_copy(Z, X); - Z->MBEDTLS_PRIVATE(s) *= Y->MBEDTLS_PRIVATE(s); - return ret; + /* + * Compensate for negative A (and correct at the end) + */ + neg = ( A->s == -1 ); + if( neg ) + { + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Apos, A ) ); + Apos.s = 1; + A = &Apos; } - /* Grow Z to result size early, avoid interim allocations */ - MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, z_words) ); + /* + * If 1st call, pre-compute R^2 mod N + */ + if( _RR == NULL || _RR->p == NULL ) + { + MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &RR, 1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &RR, N->n * 2 * biL ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &RR, &RR, N ) ); - /* If either factor is over 2048 bits, we can't use the standard hardware multiplier - (it assumes result is double longest factor, and result is max 4096 bits.) + if( _RR != NULL ) + memcpy( _RR, &RR, sizeof( mbedtls_mpi ) ); + } + else + memcpy( &RR, _RR, sizeof( mbedtls_mpi ) ); - However, we can fail over to mod_mult for up to 4096 bits of result (modulo - multiplication doesn't have the same restriction, so result is simply the - number of bits in X plus number of bits in in Y.) - */ - if (hw_words * 32 > SOC_RSA_MAX_BIT_LEN/2) { - if (z_words * 32 <= SOC_RSA_MAX_BIT_LEN) { - /* Note: it's possible to use mpi_mult_mpi_overlong - for this case as well, but it's very slightly - slower and requires a memory allocation. - */ - return mpi_mult_mpi_failover_mod_mult(Z, X, Y, z_words); - } else { - /* Still too long for the hardware unit... */ - if (y_words > x_words) { - return mpi_mult_mpi_overlong(Z, X, Y, y_words, z_words); - } else { - return mpi_mult_mpi_overlong(Z, Y, X, x_words, z_words); - } + /* + * W[1] = A * R^2 * R^-1 mod N = A * R mod N + */ + if( mbedtls_mpi_cmp_mpi( A, N ) >= 0 ) + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &W[1], A, N ) ); + else + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[1], A ) ); + + MBEDTLS_MPI_CHK( mpi_montmul( &W[1], &RR, N, mm, &T ) ); + + /* + * X = R^2 * R^-1 mod N = R mod N + */ + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) ); + MBEDTLS_MPI_CHK( mpi_montred( X, N, mm, &T ) ); + + if( wsize > 1 ) + { + /* + * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1) + */ + j = one << ( wsize - 1 ); + + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1] ) ); + + for( i = 0; i < wsize - 1; i++ ) + MBEDTLS_MPI_CHK( mpi_montmul( &W[j], &W[j], N, mm, &T ) ); + + /* + * W[i] = W[i - 1] * W[1] + */ + for( i = j + 1; i < ( one << wsize ); i++ ) + { + MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) ); + + MBEDTLS_MPI_CHK( mpi_montmul( &W[i], &W[1], N, mm, &T ) ); } } - /* Otherwise, we can use the (faster) multiply hardware unit */ - esp_mpi_enable_hardware_hw_op(); + nblimbs = E->n; + bufsize = 0; + nbits = 0; + wbits = 0; + state = 0; - esp_mpi_mul_mpi_hw_op(X, Y, hw_words); - esp_mpi_read_result_hw_op(Z, z_words); + while( 1 ) + { + if( bufsize == 0 ) + { + if( nblimbs == 0 ) + break; - esp_mpi_disable_hardware_hw_op(); + nblimbs--; - Z->MBEDTLS_PRIVATE(s) = X->MBEDTLS_PRIVATE(s) * Y->MBEDTLS_PRIVATE(s); + bufsize = sizeof( mbedtls_mpi_uint ) << 3; + } -cleanup: - return ret; -} + bufsize--; -int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint b ) -{ - mbedtls_mpi _B; - mbedtls_mpi_uint p[1]; + ei = (E->p[nblimbs] >> bufsize) & 1; - _B.MBEDTLS_PRIVATE(s) = 1; - _B.MBEDTLS_PRIVATE(n) = 1; - _B.MBEDTLS_PRIVATE(p) = p; - p[0] = b; + /* + * skip leading 0s + */ + if( ei == 0 && state == 0 ) + continue; - return( mbedtls_mpi_mul_mpi( X, A, &_B ) ); -} + if( ei == 0 && state == 1 ) + { + /* + * out of window, square X + */ + MBEDTLS_MPI_CHK( mpi_montmul( X, X, N, mm, &T ) ); + continue; + } -/* Deal with the case when X & Y are too long for the hardware unit, by splitting one operand - into two halves. + /* + * add ei to current window + */ + state = 2; - Y must be the longer operand + nbits++; + wbits |= ( ei << ( wsize - nbits ) ); - Slice Y into Yp, Ypp such that: - Yp = lower 'b' bits of Y - Ypp = upper 'b' bits of Y (right shifted) + if( nbits == wsize ) + { + /* + * X = X^wsize R^-1 mod N + */ + for( i = 0; i < wsize; i++ ) + MBEDTLS_MPI_CHK( mpi_montmul( X, X, N, mm, &T ) ); - Such that - Z = X * Y - Z = X * (Yp + Ypp<MBEDTLS_PRIVATE(p), - .MBEDTLS_PRIVATE(n) = words_slice, - .MBEDTLS_PRIVATE(s) = Y->MBEDTLS_PRIVATE(s) - }; - /* Ypp holds upper bits of Y, right shifted (also reuses Y's array contents) */ - const mbedtls_mpi Ypp = { - .MBEDTLS_PRIVATE(p) = Y->MBEDTLS_PRIVATE(p) + words_slice, - .MBEDTLS_PRIVATE(n) = y_words - words_slice, - .MBEDTLS_PRIVATE(s) = Y->MBEDTLS_PRIVATE(s) - }; - mbedtls_mpi_init(&Ztemp); + state--; + nbits = 0; + wbits = 0; + } + } - /* Get result Ztemp = Yp * X (need temporary variable Ztemp) */ - MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi(&Ztemp, X, &Yp) ); - - /* Z = Ypp * Y */ - MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi(Z, X, &Ypp) ); - - /* Z = Z << b */ - MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l(Z, words_slice * 32) ); - - /* Z += Ztemp */ - MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi(Z, Z, &Ztemp) ); - -cleanup: - mbedtls_mpi_free(&Ztemp); - - return ret; -} - -/* Special-case of mbedtls_mpi_mult_mpi(), where we use hardware montgomery mod - multiplication to calculate an mbedtls_mpi_mult_mpi result where either - A or B are >2048 bits so can't use the standard multiplication method. - - Result (number of words, based on A bits + B bits) must still be less than 4096 bits. - - This case is simpler than the general case modulo multiply of - esp_mpi_mul_mpi_mod() because we can control the other arguments: - - * Modulus is chosen with M=(2^num_bits - 1) (ie M=R-1), so output - * Mprime and Rinv are therefore predictable as follows: - isn't actually modulo anything. - Mprime 1 - Rinv 1 - - (See RSA Accelerator section in Technical Reference for more about Mprime, Rinv) -*/ - -static int mpi_mult_mpi_failover_mod_mult( mbedtls_mpi *Z, const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t z_words) -{ - int ret; - size_t hw_words = esp_mpi_hardware_words(z_words); - - esp_mpi_enable_hardware_hw_op(); - - esp_mpi_mult_mpi_failover_mod_mult_hw_op(X, Y, hw_words ); - MBEDTLS_MPI_CHK( mbedtls_mpi_grow(Z, hw_words) ); - esp_mpi_read_result_hw_op(Z, hw_words); - - Z->MBEDTLS_PRIVATE(s) = X->MBEDTLS_PRIVATE(s) * Y->MBEDTLS_PRIVATE(s); /* - * Relevant: https://github.com/espressif/esp-idf/issues/11850 - * If the first condition fails then most likely hardware peripheral - * has produced an incorrect result for MPI operation. This can - * happen if data fed to the peripheral register was incorrect. - * - * z_words is calculated as the worst-case possible size of the result - * MPI Z. The difference between z_words and the actual words taken by - * the MPI result (mpi_words(Z)) can be a maximum of 1 word. - * The value z_bits (actual bits taken by the MPI result) is calculated - * as x_bits + y_bits bits, however, in some cases, z_bits can be - * x_bits + y_bits - 1 bits (see example below). - * 0b1111 * 0b1111 = 0b11100001 -> 8 bits - * 0b1000 * 0b1000 = 0b01000000 -> 7 bits. - * The code rounds up to the nearest word size, so the maximum difference - * could be of only 1 word. The second condition handles this. + * process the remaining bits */ - assert((z_words >= mpi_words(Z)) && (z_words - mpi_words(Z) <= (size_t)1)); -cleanup: - esp_mpi_disable_hardware_hw_op(); - return ret; -} + for( i = 0; i < nbits; i++ ) + { + MBEDTLS_MPI_CHK( mpi_montmul( X, X, N, mm, &T ) ); -#endif /* MBEDTLS_MPI_MUL_MPI_ALT */ + wbits <<= 1; + + if( ( wbits & ( one << wsize ) ) != 0 ) + MBEDTLS_MPI_CHK( mpi_montmul( X, &W[1], N, mm, &T ) ); + } + + /* + * X = A^E * R * R^-1 mod N = A^E mod N + */ + MBEDTLS_MPI_CHK( mpi_montred( X, N, mm, &T ) ); + + if( neg && E->n != 0 && ( E->p[0] & 1 ) != 0 ) + { + X->s = -1; + MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) ); + } + +cleanup: + + for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ ) + mbedtls_mpi_free( &W[i] ); + + mbedtls_mpi_free( &W[1] ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos ); + + if( _RR == NULL || _RR->p == NULL ) + mbedtls_mpi_free( &RR ); + + return( ret ); +} +#endif /* MBEDTLS_MPI_EXP_MOD_ALT */ +#endif /* MBEDTLS_BIGNUM_C */ diff --git a/components/mbedtls/mbedtls_v3/port/include/mbedtls/esp_config.h b/components/mbedtls/mbedtls_v3/port/include/mbedtls/esp_config.h index 0ce60511..aa930cb2 100644 --- a/components/mbedtls/mbedtls_v3/port/include/mbedtls/esp_config.h +++ b/components/mbedtls/mbedtls_v3/port/include/mbedtls/esp_config.h @@ -206,9 +206,15 @@ #define MBEDTLS_MPI_MUL_MPI_ALT #else #undef MBEDTLS_MPI_EXP_MOD_ALT_FALLBACK -#undef MBEDTLS_MPI_EXP_MOD_ALT #undef MBEDTLS_MPI_MUL_MPI_ALT +#if defined(CONFIG_MBEDTLS_MPI_EXP_MOD_ALT) +#define MBEDTLS_MPI_EXP_MOD_ALT +#else +#undef MBEDTLS_MPI_EXP_MOD_ALT #endif +#endif + +#define MBEDTLS_MPI_WINDOW_SIZE 6 #ifdef CONFIG_MBEDTLS_ATCA_HW_ECDSA_SIGN #define MBEDTLS_ECDSA_SIGN_ALT