From c6d0f607edfaf49ca56fe5d2b733a243495a23f3 Mon Sep 17 00:00:00 2001
From: K1 <dongbeiouba@gmail.com>
Date: Wed, 6 Nov 2024 17:37:26 +0800
Subject: [PATCH 1/4] Revert "Fix Timing Oracle in RSA decryption"

This reverts commit 14cb437f5b946943e0aa0270cab55f0c726cdd6e.

(Merged from https://github.com/openssl/openssl/pull/20284)
---
 crypto/bn/bn_blind.c    |  14 +
 crypto/bn/bn_err.c      |   2 -
 crypto/bn/bn_local.h    |  14 -
 crypto/bn/build.info    |   3 +-
 crypto/bn/rsa_sup_mul.c | 614 ----------------------------------------
 crypto/err/openssl.txt  |   1 -
 crypto/rsa/rsa_ossl.c   |  17 +-
 include/crypto/bn.h     |   5 -
 include/openssl/bnerr.h |   1 -
 9 files changed, 19 insertions(+), 652 deletions(-)
 delete mode 100644 crypto/bn/rsa_sup_mul.c

diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c
index 6e9d23932..76fc7ebcf 100644
--- a/crypto/bn/bn_blind.c
+++ b/crypto/bn/bn_blind.c
@@ -13,6 +13,20 @@
 
 #define BN_BLINDING_COUNTER     32
 
+struct bn_blinding_st {
+    BIGNUM *A;
+    BIGNUM *Ai;
+    BIGNUM *e;
+    BIGNUM *mod;                /* just a reference */
+    CRYPTO_THREAD_ID tid;
+    int counter;
+    unsigned long flags;
+    BN_MONT_CTX *m_ctx;
+    int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
+                       const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
+    CRYPTO_RWLOCK *lock;
+};
+
 BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
 {
     BN_BLINDING *ret = NULL;
diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c
index 6e155139b..398e75b43 100644
--- a/crypto/bn/bn_err.c
+++ b/crypto/bn/bn_err.c
@@ -73,8 +73,6 @@ static const ERR_STRING_DATA BN_str_functs[] = {
     {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"},
     {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"},
     {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"},
-    {ERR_PACK(ERR_LIB_BN, BN_F_OSSL_BN_RSA_DO_UNBLIND, 0),
-    "ossl_bn_rsa_do_unblind"},
     {0, NULL}
 };
 
diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h
index 096513533..8ad69ccd3 100644
--- a/crypto/bn/bn_local.h
+++ b/crypto/bn/bn_local.h
@@ -263,20 +263,6 @@ struct bn_gencb_st {
     } cb;
 };
 
-struct bn_blinding_st {
-    BIGNUM *A;
-    BIGNUM *Ai;
-    BIGNUM *e;
-    BIGNUM *mod;                /* just a reference */
-    CRYPTO_THREAD_ID tid;
-    int counter;
-    unsigned long flags;
-    BN_MONT_CTX *m_ctx;
-    int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
-                       const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
-    CRYPTO_RWLOCK *lock;
-};
-
 /*-
  * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
  *
diff --git a/crypto/bn/build.info b/crypto/bn/build.info
index c9fe2fdad..b9ed5322f 100644
--- a/crypto/bn/build.info
+++ b/crypto/bn/build.info
@@ -5,8 +5,7 @@ SOURCE[../../libcrypto]=\
         bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \
         {- $target{bn_asm_src} -} \
         bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
-        bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c \
-        rsa_sup_mul.c
+        bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c
 
 INCLUDE[bn_exp.o]=..
 
diff --git a/crypto/bn/rsa_sup_mul.c b/crypto/bn/rsa_sup_mul.c
deleted file mode 100644
index acafefd5f..000000000
--- a/crypto/bn/rsa_sup_mul.c
+++ /dev/null
@@ -1,614 +0,0 @@
-#include <openssl/e_os2.h>
-#include <stddef.h>
-#include <sys/types.h>
-#include <string.h>
-#include <openssl/bn.h>
-#include <openssl/err.h>
-#include <openssl/rsaerr.h>
-#include "internal/numbers.h"
-#include "internal/constant_time.h"
-#include "bn_local.h"
-
-# if BN_BYTES == 8
-typedef uint64_t limb_t;
-#  if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16
-/* nonstandard; implemented by gcc on 64-bit platforms */
-typedef __uint128_t limb2_t;
-#   define HAVE_LIMB2_T
-#  endif
-#  define LIMB_BIT_SIZE 64
-#  define LIMB_BYTE_SIZE 8
-# elif BN_BYTES == 4
-typedef uint32_t limb_t;
-typedef uint64_t limb2_t;
-#  define LIMB_BIT_SIZE 32
-#  define LIMB_BYTE_SIZE 4
-#  define HAVE_LIMB2_T
-# else
-#  error "Not supported"
-# endif
-
-/*
- * For multiplication we're using schoolbook multiplication,
- * so if we have two numbers, each with 6 "digits" (words)
- * the multiplication is calculated as follows:
- *                        A B C D E F
- *                     x  I J K L M N
- *                     --------------
- *                                N*F
- *                              N*E
- *                            N*D
- *                          N*C
- *                        N*B
- *                      N*A
- *                              M*F
- *                            M*E
- *                          M*D
- *                        M*C
- *                      M*B
- *                    M*A
- *                            L*F
- *                          L*E
- *                        L*D
- *                      L*C
- *                    L*B
- *                  L*A
- *                          K*F
- *                        K*E
- *                      K*D
- *                    K*C
- *                  K*B
- *                K*A
- *                        J*F
- *                      J*E
- *                    J*D
- *                  J*C
- *                J*B
- *              J*A
- *                      I*F
- *                    I*E
- *                  I*D
- *                I*C
- *              I*B
- *         +  I*A
- *         ==========================
- *                        N*B N*D N*F
- *                    + N*A N*C N*E
- *                    + M*B M*D M*F
- *                  + M*A M*C M*E
- *                  + L*B L*D L*F
- *                + L*A L*C L*E
- *                + K*B K*D K*F
- *              + K*A K*C K*E
- *              + J*B J*D J*F
- *            + J*A J*C J*E
- *            + I*B I*D I*F
- *          + I*A I*C I*E
- *
- *                1+1 1+3 1+5
- *              1+0 1+2 1+4
- *              0+1 0+3 0+5
- *            0+0 0+2 0+4
- *
- *            0 1 2 3 4 5 6
- * which requires n^2 multiplications and 2n full length additions
- * as we can keep every other result of limb multiplication in two separate
- * limbs
- */
-
-#if defined HAVE_LIMB2_T
-static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b)
-{
-    limb2_t t;
-    /*
-     * this is idiomatic code to tell compiler to use the native mul
-     * those three lines will actually compile to single instruction
-     */
-
-    t = (limb2_t)a * b;
-    *hi = t >> LIMB_BIT_SIZE;
-    *lo = (limb_t)t;
-}
-#elif (BN_BYTES == 8) && (defined _MSC_VER)
-/* https://learn.microsoft.com/en-us/cpp/intrinsics/umul128?view=msvc-170 */
-#pragma intrinsic(_umul128)
-static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b)
-{
-    *lo = _umul128(a, b, hi);
-}
-#else
-/*
- * if the compiler doesn't have either a 128bit data type nor a "return
- * high 64 bits of multiplication"
- */
-static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b)
-{
-    limb_t a_low = (limb_t)(uint32_t)a;
-    limb_t a_hi = a >> 32;
-    limb_t b_low = (limb_t)(uint32_t)b;
-    limb_t b_hi = b >> 32;
-
-    limb_t p0 = a_low * b_low;
-    limb_t p1 = a_low * b_hi;
-    limb_t p2 = a_hi * b_low;
-    limb_t p3 = a_hi * b_hi;
-
-    uint32_t cy = (uint32_t)(((p0 >> 32) + (uint32_t)p1 + (uint32_t)p2) >> 32);
-
-    *lo = p0 + (p1 << 32) + (p2 << 32);
-    *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy;
-}
-#endif
-
-/* add two limbs with carry in, return carry out */
-static ossl_inline limb_t _add_limb(limb_t *ret, limb_t a, limb_t b, limb_t carry)
-{
-    limb_t carry1, carry2, t;
-    /*
-     * `c = a + b; if (c < a)` is idiomatic code that makes compilers
-     * use add with carry on assembly level
-     */
-
-    *ret = a + carry;
-    if (*ret < a)
-        carry1 = 1;
-    else
-        carry1 = 0;
-
-    t = *ret;
-    *ret = t + b;
-    if (*ret < t)
-        carry2 = 1;
-    else
-        carry2 = 0;
-
-    return carry1 + carry2;
-}
-
-/*
- * add two numbers of the same size, return overflow
- *
- * add a to b, place result in ret; all arrays need to be n limbs long
- * return overflow from addition (0 or 1)
- */
-static ossl_inline limb_t add(limb_t *ret, limb_t *a, limb_t *b, size_t n)
-{
-    limb_t c = 0;
-    ossl_ssize_t i;
-
-    for(i = n - 1; i > -1; i--)
-        c = _add_limb(&ret[i], a[i], b[i], c);
-
-    return c;
-}
-
-/*
- * return number of limbs necessary for temporary values
- * when multiplying numbers n limbs large
- */
-static ossl_inline size_t mul_limb_numb(size_t n)
-{
-    return  2 * n * 2;
-}
-
-/*
- * multiply two numbers of the same size
- *
- * multiply a by b, place result in ret; a and b need to be n limbs long
- * ret needs to be 2*n limbs long, tmp needs to be mul_limb_numb(n) limbs
- * long
- */
-static void limb_mul(limb_t *ret, limb_t *a, limb_t *b, size_t n, limb_t *tmp)
-{
-    limb_t *r_odd, *r_even;
-    size_t i, j, k;
-
-    r_odd = tmp;
-    r_even = &tmp[2 * n];
-
-    memset(ret, 0, 2 * n * sizeof(limb_t));
-
-    for (i = 0; i < n; i++) {
-        for (k = 0; k < i + n + 1; k++) {
-            r_even[k] = 0;
-            r_odd[k] = 0;
-        }
-        for (j = 0; j < n; j++) {
-            /*
-             * place results from even and odd limbs in separate arrays so that
-             * we don't have to calculate overflow every time we get individual
-             * limb multiplication result
-             */
-            if (j % 2 == 0)
-                _mul_limb(&r_even[i + j], &r_even[i + j + 1], a[i], b[j]);
-            else
-                _mul_limb(&r_odd[i + j], &r_odd[i + j + 1], a[i], b[j]);
-        }
-        /*
-         * skip the least significant limbs when adding multiples of
-         * more significant limbs (they're zero anyway)
-         */
-        add(ret, ret, r_even, n + i + 1);
-        add(ret, ret, r_odd, n + i + 1);
-    }
-}
-
-/* modifies the value in place by performing a right shift by one bit */
-static ossl_inline void rshift1(limb_t *val, size_t n)
-{
-    limb_t shift_in = 0, shift_out = 0;
-    size_t i;
-
-    for (i = 0; i < n; i++) {
-        shift_out = val[i] & 1;
-        val[i] = shift_in << (LIMB_BIT_SIZE - 1) | (val[i] >> 1);
-        shift_in = shift_out;
-    }
-}
-
-/* extend the LSB of flag to all bits of limb */
-static ossl_inline limb_t mk_mask(limb_t flag)
-{
-    flag |= flag << 1;
-    flag |= flag << 2;
-    flag |= flag << 4;
-    flag |= flag << 8;
-    flag |= flag << 16;
-#if (LIMB_BYTE_SIZE == 8)
-    flag |= flag << 32;
-#endif
-    return flag;
-}
-
-/*
- * copy from either a or b to ret based on flag
- * when flag == 0, then copies from b
- * when flag == 1, then copies from a
- */
-static ossl_inline void cselect(limb_t flag, limb_t *ret, limb_t *a, limb_t *b, size_t n)
-{
-    /*
-     * would be more efficient with non volatile mask, but then gcc
-     * generates code with jumps
-     */
-    volatile limb_t mask;
-    size_t i;
-
-    mask = mk_mask(flag);
-    for (i = 0; i < n; i++) {
-#if (LIMB_BYTE_SIZE == 8)
-        ret[i] = constant_time_select_64(mask, a[i], b[i]);
-#else
-        ret[i] = constant_time_select_32(mask, a[i], b[i]);
-#endif
-    }
-}
-
-static limb_t _sub_limb(limb_t *ret, limb_t a, limb_t b, limb_t borrow)
-{
-    limb_t borrow1, borrow2, t;
-    /*
-     * while it doesn't look constant-time, this is idiomatic code
-     * to tell compilers to use the carry bit from subtraction
-     */
-
-    *ret = a - borrow;
-    if (*ret > a)
-        borrow1 = 1;
-    else
-        borrow1 = 0;
-
-    t = *ret;
-    *ret = t - b;
-    if (*ret > t)
-        borrow2 = 1;
-    else
-        borrow2 = 0;
-
-    return borrow1 + borrow2;
-}
-
-/*
- * place the result of a - b into ret, return the borrow bit.
- * All arrays need to be n limbs long
- */
-static limb_t sub(limb_t *ret, limb_t *a, limb_t *b, size_t n)
-{
-    limb_t borrow = 0;
-    ossl_ssize_t i;
-
-    for (i = n - 1; i > -1; i--)
-        borrow = _sub_limb(&ret[i], a[i], b[i], borrow);
-
-    return borrow;
-}
-
-/* return the number of limbs necessary to allocate for the mod() tmp operand */
-static ossl_inline size_t mod_limb_numb(size_t anum, size_t modnum)
-{
-    return (anum + modnum) * 3;
-}
-
-/*
- * calculate a % mod, place the result in ret
- * size of a is defined by anum, size of ret and mod is modnum,
- * size of tmp is returned by mod_limb_numb()
- */
-static void mod(limb_t *ret, limb_t *a, size_t anum, limb_t *mod,
-               size_t modnum, limb_t *tmp)
-{
-    limb_t *atmp, *modtmp, *rettmp;
-    limb_t res;
-    size_t i;
-
-    memset(tmp, 0, mod_limb_numb(anum, modnum) * LIMB_BYTE_SIZE);
-
-    atmp = tmp;
-    modtmp = &tmp[anum + modnum];
-    rettmp = &tmp[(anum + modnum) * 2];
-
-    for (i = modnum; i <modnum + anum; i++)
-        atmp[i] = a[i-modnum];
-
-    for (i = 0; i < modnum; i++)
-        modtmp[i] = mod[i];
-
-    for (i = 0; i < anum * LIMB_BIT_SIZE; i++) {
-        rshift1(modtmp, anum + modnum);
-        res = sub(rettmp, atmp, modtmp, anum+modnum);
-        cselect(res, atmp, atmp, rettmp, anum+modnum);
-    }
-
-    memcpy(ret, &atmp[anum], sizeof(limb_t) * modnum);
-}
-
-/* necessary size of tmp for a _mul_add_limb() call with provided anum */
-static ossl_inline size_t _mul_add_limb_numb(size_t anum)
-{
-    return 2 * (anum + 1);
-}
-
-/* multiply a by m, add to ret, return carry */
-static limb_t _mul_add_limb(limb_t *ret, limb_t *a, size_t anum,
-                           limb_t m, limb_t *tmp)
-{
-    limb_t carry = 0;
-    limb_t *r_odd, *r_even;
-    size_t i;
-
-    memset(tmp, 0, sizeof(limb_t) * (anum + 1) * 2);
-
-    r_odd = tmp;
-    r_even = &tmp[anum + 1];
-
-    for (i = 0; i < anum; i++) {
-        /*
-         * place the results from even and odd limbs in separate arrays
-         * so that we have to worry about carry just once
-         */
-        if (i % 2 == 0)
-            _mul_limb(&r_even[i], &r_even[i + 1], a[i], m);
-        else
-            _mul_limb(&r_odd[i], &r_odd[i + 1], a[i], m);
-    }
-    /* assert: add() carry here will be equal zero */
-    add(r_even, r_even, r_odd, anum + 1);
-    /*
-     * while here it will not overflow as the max value from multiplication
-     * is -2 while max overflow from addition is 1, so the max value of
-     * carry is -1 (i.e. max int)
-     */
-    carry = add(ret, ret, &r_even[1], anum) + r_even[0];
-
-    return carry;
-}
-
-static ossl_inline size_t mod_montgomery_limb_numb(size_t modnum)
-{
-    return modnum * 2 + _mul_add_limb_numb(modnum);
-}
-
-/*
- * calculate a % mod, place result in ret
- * assumes that a is in Montgomery form with the R (Montgomery modulus) being
- * smallest power of two big enough to fit mod and that's also a power
- * of the count of number of bits in limb_t (B).
- * For calculation, we also need n', such that mod * n' == -1 mod B.
- * anum must be <= 2 * modnum
- * ret needs to be modnum words long
- * tmp needs to be mod_montgomery_limb_numb(modnum) limbs long
- */
-static void mod_montgomery(limb_t *ret, limb_t *a, size_t anum, limb_t *mod,
-                          size_t modnum, limb_t ni0, limb_t *tmp)
-{
-    limb_t carry, v;
-    limb_t *res, *rp, *tmp2;
-    ossl_ssize_t i;
-
-    res = tmp;
-    /*
-     * for intermediate result we need an integer twice as long as modulus
-     * but keep the input in the least significant limbs
-     */
-    memset(res, 0, sizeof(limb_t) * (modnum * 2));
-    memcpy(&res[modnum * 2 - anum], a, sizeof(limb_t) * anum);
-    rp = &res[modnum];
-    tmp2 = &res[modnum * 2];
-
-    carry = 0;
-
-    /* add multiples of the modulus to the value until R divides it cleanly */
-    for (i = modnum; i > 0; i--, rp--) {
-        v = _mul_add_limb(rp, mod, modnum, rp[modnum - 1] * ni0, tmp2);
-        v = v + carry + rp[-1];
-        carry |= (v != rp[-1]);
-        carry &= (v <= rp[-1]);
-        rp[-1] = v;
-    }
-
-    /* perform the final reduction by mod... */
-    carry -= sub(ret, rp, mod, modnum);
-
-    /* ...conditionally */
-    cselect(carry, ret, rp, ret, modnum);
-}
-
-/* allocated buffer should be freed afterwards */
-static void BN_to_limb(const BIGNUM *bn, limb_t *buf, size_t limbs)
-{
-    int i;
-    int real_limbs = (BN_num_bytes(bn) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
-    limb_t *ptr = buf + (limbs - real_limbs);
-
-    for (i = 0; i < real_limbs; i++)
-         ptr[i] = bn->d[real_limbs - i - 1];
-}
-
-#if LIMB_BYTE_SIZE == 8
-static ossl_inline uint64_t be64(uint64_t host)
-{
-    const union {
-        long one;
-        char little;
-    } is_endian = { 1 };
-
-    if (is_endian.little) {
-        uint64_t big = 0;
-
-        big |= (host & 0xff00000000000000) >> 56;
-        big |= (host & 0x00ff000000000000) >> 40;
-        big |= (host & 0x0000ff0000000000) >> 24;
-        big |= (host & 0x000000ff00000000) >>  8;
-        big |= (host & 0x00000000ff000000) <<  8;
-        big |= (host & 0x0000000000ff0000) << 24;
-        big |= (host & 0x000000000000ff00) << 40;
-        big |= (host & 0x00000000000000ff) << 56;
-        return big;
-    } else {
-        return host;
-    }
-}
-
-#else
-/* Not all platforms have htobe32(). */
-static ossl_inline uint32_t be32(uint32_t host)
-{
-    const union {
-        long one;
-        char little;
-    } is_endian = { 1 };
-
-    if (is_endian.little) {
-        uint32_t big = 0;
-
-        big |= (host & 0xff000000) >> 24;
-        big |= (host & 0x00ff0000) >> 8;
-        big |= (host & 0x0000ff00) << 8;
-        big |= (host & 0x000000ff) << 24;
-        return big;
-    } else {
-        return host;
-    }
-}
-#endif
-
-/*
- * We assume that intermediate, possible_arg2, blinding, and ctx are used
- * similar to BN_BLINDING_invert_ex() arguments.
- * to_mod is RSA modulus.
- * buf and num is the serialization buffer and its length.
- *
- * Here we use classic/Montgomery multiplication and modulo. After the calculation finished
- * we serialize the new structure instead of BIGNUMs taking endianness into account.
- */
-int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate,
-                           const BN_BLINDING *blinding,
-                           const BIGNUM *possible_arg2,
-                           const BIGNUM *to_mod, BN_CTX *ctx,
-                           unsigned char *buf, int num)
-{
-    limb_t *l_im = NULL, *l_mul = NULL, *l_mod = NULL;
-    limb_t *l_ret = NULL, *l_tmp = NULL, l_buf;
-    size_t l_im_count = 0, l_mul_count = 0, l_size = 0, l_mod_count = 0;
-    size_t l_tmp_count = 0;
-    int ret = 0;
-    size_t i;
-    unsigned char *tmp;
-    const BIGNUM *arg1 = intermediate;
-    const BIGNUM *arg2 = (possible_arg2 == NULL) ? blinding->Ai : possible_arg2;
-
-    l_im_count  = (BN_num_bytes(arg1)   + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
-    l_mul_count = (BN_num_bytes(arg2)   + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
-    l_mod_count = (BN_num_bytes(to_mod) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
-
-    l_size = l_im_count > l_mul_count ? l_im_count : l_mul_count;
-    l_im  = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE);
-    l_mul = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE);
-    l_mod = OPENSSL_zalloc(l_mod_count * LIMB_BYTE_SIZE);
-
-    if ((l_im == NULL) || (l_mul == NULL) || (l_mod == NULL))
-        goto err;
-
-    BN_to_limb(arg1,   l_im,  l_size);
-    BN_to_limb(arg2,   l_mul, l_size);
-    BN_to_limb(to_mod, l_mod, l_mod_count);
-
-    l_ret = OPENSSL_malloc(2 * l_size * LIMB_BYTE_SIZE);
-
-    if (blinding->m_ctx != NULL) {
-        l_tmp_count = mul_limb_numb(l_size) > mod_montgomery_limb_numb(l_mod_count) ?
-                      mul_limb_numb(l_size) : mod_montgomery_limb_numb(l_mod_count);
-        l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE);
-    } else {
-        l_tmp_count = mul_limb_numb(l_size) > mod_limb_numb(2 * l_size, l_mod_count) ?
-                      mul_limb_numb(l_size) : mod_limb_numb(2 * l_size, l_mod_count);
-        l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE);
-    }
-
-    if ((l_ret == NULL) || (l_tmp == NULL))
-        goto err;
-
-    if (blinding->m_ctx != NULL) {
-        limb_mul(l_ret, l_im, l_mul, l_size, l_tmp);
-        mod_montgomery(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count,
-                       blinding->m_ctx->n0[0], l_tmp);
-    } else {
-        limb_mul(l_ret, l_im, l_mul, l_size, l_tmp);
-        mod(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, l_tmp);
-    }
-
-    /* modulus size in bytes can be equal to num but after limbs conversion it becomes bigger */
-    if (num < BN_num_bytes(to_mod)) {
-        BNerr(BN_F_OSSL_BN_RSA_DO_UNBLIND, ERR_R_PASSED_INVALID_ARGUMENT);
-        goto err;
-    }
-
-    memset(buf, 0, num);
-    tmp = buf + num - BN_num_bytes(to_mod);
-    for (i = 0; i < l_mod_count; i++) {
-#if LIMB_BYTE_SIZE == 8
-        l_buf = be64(l_ret[i]);
-#else
-        l_buf = be32(l_ret[i]);
-#endif
-        if (i == 0) {
-            int delta = LIMB_BYTE_SIZE - ((l_mod_count * LIMB_BYTE_SIZE) - num);
-
-            memcpy(tmp, ((char *)&l_buf) + LIMB_BYTE_SIZE - delta, delta);
-            tmp += delta;
-        } else {
-            memcpy(tmp, &l_buf, LIMB_BYTE_SIZE);
-            tmp += LIMB_BYTE_SIZE;
-        }
-    }
-    ret = num;
-
- err:
-    OPENSSL_free(l_im);
-    OPENSSL_free(l_mul);
-    OPENSSL_free(l_mod);
-    OPENSSL_free(l_tmp);
-    OPENSSL_free(l_ret);
-
-    return ret;
-}
diff --git a/crypto/err/openssl.txt b/crypto/err/openssl.txt
index ea3ca7242..e56ee847c 100644
--- a/crypto/err/openssl.txt
+++ b/crypto/err/openssl.txt
@@ -234,7 +234,6 @@ BN_F_BN_RSHIFT:146:BN_rshift
 BN_F_BN_SET_WORDS:144:bn_set_words
 BN_F_BN_STACK_PUSH:148:BN_STACK_push
 BN_F_BN_USUB:115:BN_usub
-BN_F_OSSL_BN_RSA_DO_UNBLIND:151:ossl_bn_rsa_do_unblind
 BUF_F_BUF_MEM_GROW:100:BUF_MEM_grow
 BUF_F_BUF_MEM_GROW_CLEAN:105:BUF_MEM_grow_clean
 BUF_F_BUF_MEM_NEW:101:BUF_MEM_new
diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c
index e7cd60ad9..8078bb706 100644
--- a/crypto/rsa/rsa_ossl.c
+++ b/crypto/rsa/rsa_ossl.c
@@ -465,20 +465,11 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from,
         BN_free(d);
     }
 
-    if (blinding) {
-        /*
-         * ossl_bn_rsa_do_unblind() combines blinding inversion and
-         * 0-padded BN BE serialization
-         */
-        j = ossl_bn_rsa_do_unblind(ret, blinding, unblind, rsa->n, ctx,
-                                   buf, num);
-        if (j == 0)
-            goto err;
-    } else {
-        j = BN_bn2binpad(ret, buf, num);
-        if (j < 0)
+    if (blinding)
+        if (!rsa_blinding_invert(blinding, ret, unblind, ctx))
             goto err;
-    }
+
+    j = BN_bn2binpad(ret, buf, num);
 
     switch (padding) {
     case RSA_PKCS1_PADDING:
diff --git a/include/crypto/bn.h b/include/crypto/bn.h
index b5f36fb25..60afda1da 100644
--- a/include/crypto/bn.h
+++ b/include/crypto/bn.h
@@ -86,10 +86,5 @@ int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n);
 int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n);
 int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
                      const BIGNUM *d, BN_CTX *ctx);
-int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate,
-                           const BN_BLINDING *blinding,
-                           const BIGNUM *possible_arg2,
-                           const BIGNUM *to_mod, BN_CTX *ctx,
-                           unsigned char *buf, int num);
 
 #endif
diff --git a/include/openssl/bnerr.h b/include/openssl/bnerr.h
index 0c8ed6437..92e5296b2 100644
--- a/include/openssl/bnerr.h
+++ b/include/openssl/bnerr.h
@@ -70,7 +70,6 @@ int ERR_load_BN_strings(void);
 # define BN_F_BN_SET_WORDS                                144
 # define BN_F_BN_STACK_PUSH                               148
 # define BN_F_BN_USUB                                     115
-# define BN_F_OSSL_BN_RSA_DO_UNBLIND                      151
 
 /*
  * BN reason codes.

From fec04dc381af833008b185fe6fa11a5216b826b0 Mon Sep 17 00:00:00 2001
From: K1 <dongbeiouba@gmail.com>
Date: Wed, 6 Nov 2024 17:40:33 +0800
Subject: [PATCH 2/4] Alternative fix for CVE-2022-4304

This is about a timing leak in the topmost limb
of the internal result of RSA_private_decrypt,
before the padding check.

There are in fact at least three bugs together that
caused the timing leak:

First and probably most important is the fact that
the blinding did not use the constant time code path
at all when the RSA object was used for a private
decrypt, due to the fact that the Montgomery context
rsa->_method_mod_n was not set up early enough in
rsa_ossl_private_decrypt, when BN_BLINDING_create_param
needed it, and that was persisted as blinding->m_ctx,
although the RSA object creates the Montgomery context
just a bit later.

Then the infamous bn_correct_top was used on the
secret value right after the blinding was removed.

And finally the function BN_bn2binpad did not use
the constant-time code path since the BN_FLG_CONSTTIME
was not set on the secret value.

In order to address the first problem, this patch
makes sure that the rsa->_method_mod_n is initialized
right before the blinding context.

And to fix the second problem, we add a new utility
function bn_correct_top_consttime, a const-time
variant of bn_correct_top.

Together with the fact, that BN_bn2binpad is already
constant time if the flag BN_FLG_CONSTTIME is set,
this should eliminate the timing oracle completely.

In addition the no-asm variant may also have
branches that depend on secret values, because the last
invocation of bn_sub_words in bn_from_montgomery_word
had branches when the function is compiled by certain
gcc compiler versions, due to the clumsy coding style.

So additionally this patch stream-lined the no-asm
C-code in order to avoid branches where possible and
improve the resulting code quality.
---
 crypto/bn/bn_asm.c    | 106 +++++++++++++++++++++++-------------------
 crypto/bn/bn_blind.c  |   3 +-
 crypto/bn/bn_lib.c    |  22 +++++++++
 crypto/bn/bn_local.h  |  26 +++++------
 crypto/rsa/rsa_ossl.c |  15 +++---
 5 files changed, 103 insertions(+), 69 deletions(-)

diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
index e87261941..95e39cbdb 100644
--- a/crypto/bn/bn_asm.c
+++ b/crypto/bn/bn_asm.c
@@ -381,25 +381,33 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
 #ifndef OPENSSL_SMALL_FOOTPRINT
     while (n & ~3) {
         t1 = a[0];
-        t2 = b[0];
-        r[0] = (t1 - t2 - c) & BN_MASK2;
-        if (t1 != t2)
-            c = (t1 < t2);
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[0];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[0] = t1;
+        c += (t1 > t2);
         t1 = a[1];
-        t2 = b[1];
-        r[1] = (t1 - t2 - c) & BN_MASK2;
-        if (t1 != t2)
-            c = (t1 < t2);
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[1];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[1] = t1;
+        c += (t1 > t2);
         t1 = a[2];
-        t2 = b[2];
-        r[2] = (t1 - t2 - c) & BN_MASK2;
-        if (t1 != t2)
-            c = (t1 < t2);
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[2];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[2] = t1;
+        c += (t1 > t2);
         t1 = a[3];
-        t2 = b[3];
-        r[3] = (t1 - t2 - c) & BN_MASK2;
-        if (t1 != t2)
-            c = (t1 < t2);
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[3];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[3] = t1;
+        c += (t1 > t2);
         a += 4;
         b += 4;
         r += 4;
@@ -408,10 +416,12 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
 #endif
     while (n) {
         t1 = a[0];
-        t2 = b[0];
-        r[0] = (t1 - t2 - c) & BN_MASK2;
-        if (t1 != t2)
-            c = (t1 < t2);
+        t2 = (t1 - c) & BN_MASK2;
+        c  = (t2 > t1);
+        t1 = b[0];
+        t1 = (t2 - t1) & BN_MASK2;
+        r[0] = t1;
+        c += (t1 > t2);
         a++;
         b++;
         r++;
@@ -449,7 +459,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         t += c0;                /* no carry */  \
         c0 = (BN_ULONG)Lw(t);                   \
         hi = (BN_ULONG)Hw(t);                   \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
         } while(0)
 
 #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
@@ -458,11 +468,11 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         BN_ULLONG tt = t+c0;    /* no carry */  \
         c0 = (BN_ULONG)Lw(tt);                  \
         hi = (BN_ULONG)Hw(tt);                  \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
         t += c0;                /* no carry */  \
         c0 = (BN_ULONG)Lw(t);                   \
         hi = (BN_ULONG)Hw(t);                   \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
         } while(0)
 
 #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
@@ -471,7 +481,7 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         t += c0;                /* no carry */  \
         c0 = (BN_ULONG)Lw(t);                   \
         hi = (BN_ULONG)Hw(t);                   \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
         } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2) \
@@ -486,26 +496,26 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         BN_ULONG ta = (a), tb = (b);            \
         BN_ULONG lo, hi;                        \
         BN_UMULT_LOHI(lo,hi,ta,tb);             \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
+        c0 += lo; hi += (c0<lo);                \
+        c1 += hi; c2 += (c1<hi);                \
         } while(0)
 
 #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
         BN_ULONG ta = (a), tb = (b);            \
         BN_ULONG lo, hi, tt;                    \
         BN_UMULT_LOHI(lo,hi,ta,tb);             \
-        c0 += lo; tt = hi+((c0<lo)?1:0);        \
-        c1 += tt; c2 += (c1<tt)?1:0;            \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
+        c0 += lo; tt = hi + (c0<lo);            \
+        c1 += tt; c2 += (c1<tt);                \
+        c0 += lo; hi += (c0<lo);                \
+        c1 += hi; c2 += (c1<hi);                \
         } while(0)
 
 #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
         BN_ULONG ta = (a)[i];                   \
         BN_ULONG lo, hi;                        \
         BN_UMULT_LOHI(lo,hi,ta,ta);             \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
+        c0 += lo; hi += (c0<lo);                \
+        c1 += hi; c2 += (c1<hi);                \
         } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2)    \
@@ -520,26 +530,26 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         BN_ULONG ta = (a), tb = (b);            \
         BN_ULONG lo = ta * tb;                  \
         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
+        c0 += lo; hi += (c0<lo);                \
+        c1 += hi; c2 += (c1<hi);                \
         } while(0)
 
 #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
         BN_ULONG ta = (a), tb = (b), tt;        \
         BN_ULONG lo = ta * tb;                  \
         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
-        c0 += lo; tt = hi + ((c0<lo)?1:0);      \
-        c1 += tt; c2 += (c1<tt)?1:0;            \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
+        c0 += lo; tt = hi + (c0<lo);            \
+        c1 += tt; c2 += (c1<tt);                \
+        c0 += lo; hi += (c0<lo);                \
+        c1 += hi; c2 += (c1<hi);                \
         } while(0)
 
 #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
         BN_ULONG ta = (a)[i];                   \
         BN_ULONG lo = ta * ta;                  \
         BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
+        c0 += lo; hi += (c0<lo);                \
+        c1 += hi; c2 += (c1<hi);                \
         } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2)      \
@@ -554,8 +564,8 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
         mul64(lo,hi,bl,bh);                     \
-        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        c0 = (c0+lo)&BN_MASK2; hi += (c0<lo);   \
+        c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
         } while(0)
 
 #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
@@ -564,17 +574,17 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
         mul64(lo,hi,bl,bh);                     \
         tt = hi;                                \
-        c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
-        c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
-        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        c0 = (c0+lo)&BN_MASK2; tt += (c0<lo);   \
+        c1 = (c1+tt)&BN_MASK2; c2 += (c1<tt);   \
+        c0 = (c0+lo)&BN_MASK2; hi += (c0<lo);   \
+        c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
         } while(0)
 
 #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
         BN_ULONG lo, hi;                        \
         sqr64(lo,hi,(a)[i]);                    \
-        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        c0 = (c0+lo)&BN_MASK2; hi += (c0<lo);   \
+        c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
         } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2) \
diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c
index 76fc7ebcf..93c3be360 100644
--- a/crypto/bn/bn_blind.c
+++ b/crypto/bn/bn_blind.c
@@ -191,7 +191,8 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b,
             n->top = (int)(rtop & ~mask) | (ntop & mask);
             n->flags |= (BN_FLG_FIXED_TOP & ~mask);
         }
-        ret = BN_mod_mul_montgomery(n, n, r, b->m_ctx, ctx);
+        ret = bn_mul_mont_fixed_top(n, n, r, b->m_ctx, ctx);
+        bn_correct_top_consttime(n);
     } else {
         ret = BN_mod_mul(n, n, r, b->mod, ctx);
     }
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index 86d4956c8..6fc60f81d 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -985,6 +985,28 @@ BIGNUM *bn_wexpand(BIGNUM *a, int words)
     return (words <= a->dmax) ? a : bn_expand2(a, words);
 }
 
+void bn_correct_top_consttime(BIGNUM *a)
+{
+    int j, atop;
+    BN_ULONG limb;
+    unsigned int mask;
+
+    for (j = 0, atop = 0; j < a->dmax; j++) {
+        limb = a->d[j];
+        limb |= 0 - limb;
+        limb >>= BN_BITS2 - 1;
+        limb = 0 - limb;
+        mask = (unsigned int)limb;
+        mask &= constant_time_msb(j - a->top);
+        atop = constant_time_select_int(mask, j + 1, atop);
+    }
+
+    mask = constant_time_eq_int(atop, 0);
+    a->top = atop;
+    a->neg = constant_time_select_int(mask, 0, a->neg);
+    a->flags &= ~BN_FLG_FIXED_TOP;
+}
+
 void bn_correct_top(BIGNUM *a)
 {
     BN_ULONG *ftl;
diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h
index 8ad69ccd3..80b89e632 100644
--- a/crypto/bn/bn_local.h
+++ b/crypto/bn/bn_local.h
@@ -495,10 +495,10 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         ret =  (r);                     \
         BN_UMULT_LOHI(low,high,w,tmp);  \
         ret += (c);                     \
-        (c) =  (ret<(c))?1:0;           \
+        (c) =  (ret<(c));               \
         (c) += high;                    \
         ret += low;                     \
-        (c) += (ret<low)?1:0;           \
+        (c) += (ret<low);               \
         (r) =  ret;                     \
         }
 
@@ -507,7 +507,7 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         BN_UMULT_LOHI(low,high,w,ta);   \
         ret =  low + (c);               \
         (c) =  high;                    \
-        (c) += (ret<low)?1:0;           \
+        (c) += (ret<low);               \
         (r) =  ret;                     \
         }
 
@@ -523,10 +523,10 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         high=  BN_UMULT_HIGH(w,tmp);    \
         ret += (c);                     \
         low =  (w) * tmp;               \
-        (c) =  (ret<(c))?1:0;           \
+        (c) =  (ret<(c));               \
         (c) += high;                    \
         ret += low;                     \
-        (c) += (ret<low)?1:0;           \
+        (c) += (ret<low);               \
         (r) =  ret;                     \
         }
 
@@ -536,7 +536,7 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         high=  BN_UMULT_HIGH(w,ta);     \
         ret =  low + (c);               \
         (c) =  high;                    \
-        (c) += (ret<low)?1:0;           \
+        (c) += (ret<low);               \
         (r) =  ret;                     \
         }
 
@@ -569,10 +569,10 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         lt=(bl)*(lt); \
         m1=(bl)*(ht); \
         ht =(bh)*(ht); \
-        m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
+        m=(m+m1)&BN_MASK2; ht += L2HBITS((BN_ULONG)(m < m1)); \
         ht+=HBITS(m); \
         m1=L2HBITS(m); \
-        lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
+        lt=(lt+m1)&BN_MASK2; ht += (lt < m1); \
         (l)=lt; \
         (h)=ht; \
         }
@@ -589,7 +589,7 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         h*=h; \
         h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
         m =(m&BN_MASK2l)<<(BN_BITS4+1); \
-        l=(l+m)&BN_MASK2; if (l < m) h++; \
+        l=(l+m)&BN_MASK2; h += (l < m); \
         (lo)=l; \
         (ho)=h; \
         }
@@ -603,9 +603,9 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         mul64(l,h,(bl),(bh)); \
  \
         /* non-multiply part */ \
-        l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
+        l=(l+(c))&BN_MASK2; h += (l < (c)); \
         (c)=(r); \
-        l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
+        l=(l+(c))&BN_MASK2; h += (l < (c)); \
         (c)=h&BN_MASK2; \
         (r)=l; \
         }
@@ -619,7 +619,7 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
         mul64(l,h,(bl),(bh)); \
  \
         /* non-multiply part */ \
-        l+=(c); if ((l&BN_MASK2) < (c)) h++; \
+        l+=(c); h += ((l&BN_MASK2) < (c)); \
         (c)=h&BN_MASK2; \
         (r)=l&BN_MASK2; \
         }
@@ -649,7 +649,7 @@ BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
                            int cl, int dl);
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                 const BN_ULONG *np, const BN_ULONG *n0, int num);
-
+void bn_correct_top_consttime(BIGNUM *a);
 BIGNUM *int_bn_mod_inverse(BIGNUM *in,
                            const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx,
                            int *noinv);
diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c
index 8078bb706..5a2af0211 100644
--- a/crypto/rsa/rsa_ossl.c
+++ b/crypto/rsa/rsa_ossl.c
@@ -226,6 +226,7 @@ static int rsa_blinding_invert(BN_BLINDING *b, BIGNUM *f, BIGNUM *unblind,
      * will only read the modulus from BN_BLINDING. In both cases it's safe
      * to access the blinding without a lock.
      */
+    BN_set_flags(f, BN_FLG_CONSTTIME);
     return BN_BLINDING_invert_ex(f, unblind, b, ctx);
 }
 
@@ -412,6 +413,11 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from,
         goto err;
     }
 
+    if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
+        if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
+                                    rsa->n, ctx))
+            goto err;
+
     if (!(rsa->flags & RSA_FLAG_NO_BLINDING)) {
         blinding = rsa_get_blinding(rsa, &local_blinding, ctx);
         if (blinding == NULL) {
@@ -449,13 +455,6 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from,
             goto err;
         }
         BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
-
-        if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-            if (!BN_MONT_CTX_set_locked(&rsa->_method_mod_n, rsa->lock,
-                                        rsa->n, ctx)) {
-                BN_free(d);
-                goto err;
-            }
         if (!rsa->meth->bn_mod_exp(ret, f, d, rsa->n, ctx,
                                    rsa->_method_mod_n)) {
             BN_free(d);
@@ -470,6 +469,8 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from,
             goto err;
 
     j = BN_bn2binpad(ret, buf, num);
+    if (j < 0)
+        goto err;
 
     switch (padding) {
     case RSA_PKCS1_PADDING:

From a767fd93d1daca1b47be29e84b06554959403349 Mon Sep 17 00:00:00 2001
From: K1 <dongbeiouba@gmail.com>
Date: Mon, 6 Jan 2025 12:33:22 +0800
Subject: [PATCH 3/4] Add a CHANGES entry for CVE-2022-4304.

---
 CHANGES    | 2 ++
 CHANGES.en | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/CHANGES b/CHANGES
index ee12fec9a..5fb12a441 100644
--- a/CHANGES
+++ b/CHANGES
@@ -7,6 +7,8 @@
 
  Changes between 8.3.3 and 8.3.4 [xxxx年xx月xx日]
 
+  *) 重新修复CVE-2022-4304
+
   *) 修复CVE-2024-5535
 
   *) 修复CVE-2024-4741
diff --git a/CHANGES.en b/CHANGES.en
index a48e8c0fe..61d0a1838 100644
--- a/CHANGES.en
+++ b/CHANGES.en
@@ -7,6 +7,8 @@
 
  Changes between 8.3.3 and 8.3.4 [xx XXX xxxx]
 
+  *) Alternative fix for CVE-2022-4304
+
   *) Fix CVE-2024-5535
 
   *) Fix CVE-2024-4741

From 068e044968ac4adb50c788346b17a69fcb8331ad Mon Sep 17 00:00:00 2001
From: K1 <dongbeiouba@gmail.com>
Date: Mon, 6 Jan 2025 14:36:39 +0800
Subject: [PATCH 4/4] Update copyright year to 2025.

---
 crypto/asn1/charmap.h     | 2 +-
 crypto/bn/bn_prime.h      | 2 +-
 crypto/conf/conf_def.h    | 2 +-
 crypto/objects/obj_dat.h  | 2 +-
 crypto/objects/obj_xref.h | 2 +-
 include/openssl/obj_mac.h | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/crypto/asn1/charmap.h b/crypto/asn1/charmap.h
index 7f4343179..af481d30f 100644
--- a/crypto/asn1/charmap.h
+++ b/crypto/asn1/charmap.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/asn1/charmap.pl
  *
- * Copyright 2000-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2025 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/bn/bn_prime.h b/crypto/bn/bn_prime.h
index 1953d519f..6f79d3801 100644
--- a/crypto/bn/bn_prime.h
+++ b/crypto/bn/bn_prime.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/bn/bn_prime.pl
  *
- * Copyright 1998-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-2025 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
diff --git a/crypto/conf/conf_def.h b/crypto/conf/conf_def.h
index 8a869dc8b..32ccd99ec 100644
--- a/crypto/conf/conf_def.h
+++ b/crypto/conf/conf_def.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/conf/keysets.pl
  *
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at
diff --git a/crypto/objects/obj_dat.h b/crypto/objects/obj_dat.h
index 4606c5705..c8970bc5c 100644
--- a/crypto/objects/obj_dat.h
+++ b/crypto/objects/obj_dat.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/objects/obj_dat.pl
  *
- * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at
diff --git a/crypto/objects/obj_xref.h b/crypto/objects/obj_xref.h
index c5532c759..668fff0d4 100644
--- a/crypto/objects/obj_xref.h
+++ b/crypto/objects/obj_xref.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by objxref.pl
  *
- * Copyright 1998-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1998-2025 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
diff --git a/include/openssl/obj_mac.h b/include/openssl/obj_mac.h
index bd98d4dec..7eb9f0d3b 100644
--- a/include/openssl/obj_mac.h
+++ b/include/openssl/obj_mac.h
@@ -2,7 +2,7 @@
  * WARNING: do not edit!
  * Generated by crypto/objects/objects.pl
  *
- * Copyright 2000-2024 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2000-2025 The OpenSSL Project Authors. All Rights Reserved.
  * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at