diff --git a/library/bignum.c b/library/bignum.c index d3d72ab747..3926da418a 100644 --- a/library/bignum.c +++ b/library/bignum.c @@ -1683,13 +1683,7 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi *prec_RR) { int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; - size_t window_bitsize; - size_t i, j, nblimbs; - size_t bufsize, nbits; - size_t exponent_bits_in_window = 0; - mbedtls_mpi_uint ei, mm, state; - mbedtls_mpi RR, T, W[(size_t) 1 << MBEDTLS_MPI_WINDOW_SIZE], WW, Apos; - int neg; + mbedtls_mpi RR, T, E_core; if (mbedtls_mpi_cmp_int(N, 0) <= 0 || (N->p[0] & 1) == 0) { return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; @@ -1704,89 +1698,15 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A, return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; } - /* - * Init temps and window size - */ - mpi_montg_init(&mm, N); - mbedtls_mpi_init(&RR); mbedtls_mpi_init(&T); - mbedtls_mpi_init(&Apos); - mbedtls_mpi_init(&WW); - memset(W, 0, sizeof(W)); - - i = mbedtls_mpi_bitlen(E); - - window_bitsize = (i > 671) ? 6 : (i > 239) ? 5 : - (i > 79) ? 4 : (i > 23) ? 3 : 1; - -#if (MBEDTLS_MPI_WINDOW_SIZE < 6) - if (window_bitsize > MBEDTLS_MPI_WINDOW_SIZE) { - window_bitsize = MBEDTLS_MPI_WINDOW_SIZE; - } -#endif - - const size_t w_table_used_size = (size_t) 1 << window_bitsize; - - /* - * This function is not constant-trace: its memory accesses depend on the - * exponent value. To defend against timing attacks, callers (such as RSA - * and DHM) should use exponent blinding. However this is not enough if the - * adversary can find the exponent in a single trace, so this function - * takes extra precautions against adversaries who can observe memory - * access patterns. - * - * This function performs a series of multiplications by table elements and - * squarings, and we want the prevent the adversary from finding out which - * table element was used, and from distinguishing between multiplications - * and squarings. Firstly, when multiplying by an element of the window - * W[i], we do a constant-trace table lookup to obfuscate i. This leaves - * squarings as having a different memory access patterns from other - * multiplications. So secondly, we put the accumulator in the table as - * well, and also do a constant-trace table lookup to multiply by the - * accumulator which is W[x_index]. - * - * This way, all multiplications take the form of a lookup-and-multiply. - * The number of lookup-and-multiply operations inside each iteration of - * the main loop still depends on the bits of the exponent, but since the - * other operations in the loop don't have an easily recognizable memory - * trace, an adversary is unlikely to be able to observe the exact - * patterns. - * - * An adversary may still be able to recover the exponent if they can - * observe both memory accesses and branches. However, branch prediction - * exploitation typically requires many traces of execution over the same - * data, which is defeated by randomized blinding. - */ - const size_t x_index = 0; - mbedtls_mpi_init(&W[x_index]); - - j = N->n + 1; - /* All W[i] including the accumulator must have at least N->n limbs for - * the mpi_montmul() and mpi_montred() calls later. Here we ensure that - * W[1] and the accumulator W[x_index] are large enough. later we'll grow - * other W[i] to the same length. They must not be shrunk midway through - * this function! - */ - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[x_index], j)); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1], j)); - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&T, j * 2)); - - /* - * Compensate for negative A (and correct at the end) - */ - neg = (A->s == -1); - if (neg) { - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Apos, A)); - Apos.s = 1; - A = &Apos; - } + mbedtls_mpi_init(&RR); + mbedtls_mpi_init(&T); + mbedtls_mpi_init(&E_core); /* * If 1st call, pre-compute R^2 mod N */ if (prec_RR == NULL || prec_RR->p == NULL) { - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&RR, 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&RR, N->n * 2 * biL)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&RR, &RR, N)); + MBEDTLS_MPI_CHK(mbedtls_mpi_core_get_mont_r2_unsafe(&RR, N)); if (prec_RR != NULL) { memcpy(prec_RR, &RR, sizeof(mbedtls_mpi)); @@ -1796,175 +1716,68 @@ int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A, } /* - * W[1] = A * R^2 * R^-1 mod N = A * R mod N + * Ensure that the exponent that we are passing to the core is not NULL. */ - if (mbedtls_mpi_cmp_mpi(A, N) >= 0) { - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&W[1], A, N)); - /* This should be a no-op because W[1] is already that large before - * mbedtls_mpi_mod_mpi(), but it's necessary to avoid an overflow - * in mpi_montmul() below, so let's make sure. */ - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1], N->n + 1)); + if (E->n == 0) { + mbedtls_mpi_lset(&E_core, 0); } else { - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[1], A)); - } - - /* Note that this is safe because W[1] always has at least N->n limbs - * (it grew above and was preserved by mbedtls_mpi_copy()). */ - mpi_montmul(&W[1], &RR, N, mm, &T); - - /* - * W[x_index] = R^2 * R^-1 mod N = R mod N - */ - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[x_index], &RR)); - mpi_montred(&W[x_index], N, mm, &T); - - - if (window_bitsize > 1) { - /* - * W[i] = W[1] ^ i - * - * The first bit of the sliding window is always 1 and therefore we - * only need to store the second half of the table. - * - * (There are two special elements in the table: W[0] for the - * accumulator/result and W[1] for A in Montgomery form. Both of these - * are already set at this point.) - */ - j = w_table_used_size / 2; - - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[j], N->n + 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[j], &W[1])); - - for (i = 0; i < window_bitsize - 1; i++) { - mpi_montmul(&W[j], &W[j], N, mm, &T); - } - - /* - * W[i] = W[i - 1] * W[1] - */ - for (i = j + 1; i < w_table_used_size; i++) { - MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[i], N->n + 1)); - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[i], &W[i - 1])); - - mpi_montmul(&W[i], &W[1], N, mm, &T); - } - } - - nblimbs = E->n; - bufsize = 0; - nbits = 0; - state = 0; - - while (1) { - if (bufsize == 0) { - if (nblimbs == 0) { - break; - } - - nblimbs--; - - bufsize = sizeof(mbedtls_mpi_uint) << 3; - } - - bufsize--; - - ei = (E->p[nblimbs] >> bufsize) & 1; - - /* - * skip leading 0s - */ - if (ei == 0 && state == 0) { - continue; - } - - if (ei == 0 && state == 1) { - /* - * out of window, square W[x_index] - */ - MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, x_index)); - mpi_montmul(&W[x_index], &WW, N, mm, &T); - continue; - } - - /* - * add ei to current window - */ - state = 2; - - nbits++; - exponent_bits_in_window |= (ei << (window_bitsize - nbits)); - - if (nbits == window_bitsize) { - /* - * W[x_index] = W[x_index]^window_bitsize R^-1 mod N - */ - for (i = 0; i < window_bitsize; i++) { - MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, - x_index)); - mpi_montmul(&W[x_index], &WW, N, mm, &T); - } - - /* - * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N - */ - MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, - exponent_bits_in_window)); - mpi_montmul(&W[x_index], &WW, N, mm, &T); - - state--; - nbits = 0; - exponent_bits_in_window = 0; - } + memcpy(&E_core, E, sizeof(mbedtls_mpi)); } /* - * process the remaining bits + * To preserve constness we need to make a copy of A. Using X for this to + * save memory. */ - for (i = 0; i < nbits; i++) { - MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, x_index)); - mpi_montmul(&W[x_index], &WW, N, mm, &T); - - exponent_bits_in_window <<= 1; - - if ((exponent_bits_in_window & ((size_t) 1 << window_bitsize)) != 0) { - MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, 1)); - mpi_montmul(&W[x_index], &WW, N, mm, &T); - } - } + MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, A)); /* - * W[x_index] = A^E * R * R^-1 mod N = A^E mod N + * Compensate for negative A (and correct at the end). */ - mpi_montred(&W[x_index], N, mm, &T); - - if (neg && E->n != 0 && (E->p[0] & 1) != 0) { - W[x_index].s = -1; - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&W[x_index], N, &W[x_index])); - } + X->s = 1; /* - * Load the result in the output variable. + * Make sure that A has exactly as many limbs as N. */ - MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, &W[x_index])); + if (mbedtls_mpi_cmp_mpi(X, N) >= 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(X, X, N)); + } + MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, N->n)); + + /* + * Allocate working memory for mbedtls_mpi_core_exp_mod() + */ + MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&T, + mbedtls_mpi_core_exp_mod_working_limbs(N->n, E_core.n))); + + /* + * Convert to and from Montgomery around mbedtls_mpi_core_exp_mod(). + */ + mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N->p); + mbedtls_mpi_core_to_mont_rep(X->p, X->p, N->p, N->n, mm, RR.p, T.p); + mbedtls_mpi_core_exp_mod(X->p, X->p, N->p, N->n, E_core.p, E_core.n, RR.p, + T.p); + mbedtls_mpi_core_from_mont_rep(X->p, X->p, N->p, N->n, mm, T.p); + + /* + * Correct for negative A. + */ + if (A->s == -1 && (E_core.p[0] & 1) != 0) { + X->s = -1; + MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(X, N, X)); + } cleanup: - /* The first bit of the sliding window is always 1 and therefore the first - * half of the table was unused. */ - for (i = w_table_used_size/2; i < w_table_used_size; i++) { - mbedtls_mpi_free(&W[i]); - } - - mbedtls_mpi_free(&W[x_index]); - mbedtls_mpi_free(&W[1]); mbedtls_mpi_free(&T); - mbedtls_mpi_free(&Apos); - mbedtls_mpi_free(&WW); if (prec_RR == NULL || prec_RR->p == NULL) { mbedtls_mpi_free(&RR); } + if (E->n == 0) { + mbedtls_mpi_free(&E_core); + } + return ret; }