Skip to content

Commit 479b744

Browse files
committed
variety of comments/nits
1 parent 14d5b6d commit 479b744

File tree

13 files changed

+206
-182
lines changed

13 files changed

+206
-182
lines changed

.github/workflows/test.yml

-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ jobs:
5252
- uses: EmbarkStudios/cargo-deny-action@v1
5353

5454

55-
# TODO: Temp 'fix' for Rust 1.80/1.81 problem involving 'time'; to be unwound...
5655
cargo_outdated:
5756
runs-on: ubuntu-latest
5857
steps:

CHANGELOG.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## 0.4.4 (2024-10-XX)
8+
## 0.4.4 (2024-10-29)
99

1010
- Significant shrink of required stack size
11-
- Internal-only refactoring and polishing
11+
- Internal-only refactoring, clean-up and polishing
1212

1313
## 0.4.3 (2024-10-16)
1414

benches/README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ Note that constant-time restrictions on the implementation do impact performance
33

44
Additional performance optimizations are on the roadmap. Near-obvious uplift can be
55
had with more careful modular multiplication & addition using fewer reductions. Also,
6-
'u16' arithmetic has an x86 performance penalty.
6+
'u16' arithmetic has an x86 performance penalty. The `cap_a_hat` pre-compute can be
7+
put into both PublicKey and PrivateKey structs, but current causes stack overflows on
8+
Windows with unoptimized dev builds...this will be investigated further.
79

810
~~~
911
October 15, 2024

dudect/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ impl CryptoRng for TestRng {}
2626
#[repr(align(8))]
2727
pub struct AlignedBytes<const BYTE_LEN: usize>(pub(crate) [u8; BYTE_LEN]);
2828

29-
29+
#[allow(deprecated)] // calling dudect fn below in inner loop
3030
fn keygen_and_sign(runner: &mut CtRunner, mut _rng: &mut BenchRng) {
3131
const ITERATIONS_INNER: usize = 5;
3232
const ITERATIONS_OUTER: usize = 2_usize.pow(20); // 2**20 = 1_048_576

src/encodings.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ use crate::{D, Q};
1313
///
1414
/// This is only used in `ml_dsa::key_gen()` and does not involve untrusted input.
1515
///
16-
/// **Input**: `ρ ∈ {0,1}^256`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}-1]`. <br>
16+
/// **Input**: `ρ ∈ B^{32}`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}-1]`. <br>
1717
/// **Output**: Public key `pk ∈ B^{32+32·k·(bitlen(q−1)−d)}`.
1818
pub(crate) fn pk_encode<const K: usize, const PK_LEN: usize>(
1919
rho: &[u8; 32], t1: &[R; K],
2020
) -> [u8; PK_LEN] {
21-
let blqd = bit_length(Q - 1) - D as usize;
22-
debug_assert!(t1.iter().all(|t| is_in_range(t, 0, (1 << blqd) - 1)), "Alg 22: t1 out of range");
23-
debug_assert_eq!(PK_LEN, 32 + 32 * K * blqd, "Alg 22: bad pk/config size");
21+
const BLQD: usize = bit_length(Q - 1) - D as usize;
22+
debug_assert!(t1.iter().all(|t| is_in_range(t, 0, (1 << BLQD) - 1)), "Alg 22: t1 out of range");
23+
debug_assert_eq!(PK_LEN, 32 + 32 * K * BLQD, "Alg 22: bad pk/config size");
2424
let mut pk = [0u8; PK_LEN];
2525

2626
// 1: pk ← rho
@@ -30,10 +30,10 @@ pub(crate) fn pk_encode<const K: usize, const PK_LEN: usize>(
3030
// 3: pk ← pk || SimpleBitPack(t1[i], 2^{bitlen(q−1)−d}-1)
3131
// 4: end for
3232
pk[32..]
33-
.chunks_mut(32 * blqd)
33+
.chunks_mut(32 * BLQD)
3434
.enumerate()
3535
.take(K) // not strictly needed
36-
.for_each(|(i, chunk)| simple_bit_pack(&t1[i], (1 << blqd) - 1, chunk));
36+
.for_each(|(i, chunk)| simple_bit_pack(&t1[i], (1 << BLQD) - 1, chunk));
3737

3838
// 5: return pk
3939
pk
@@ -47,7 +47,7 @@ pub(crate) fn pk_encode<const K: usize, const PK_LEN: usize>(
4747
/// `simple_bit_unpack()` will detect malformed input -- an overly conservative (?) route for now.
4848
///
4949
/// **Input**: Public key `pk ∈ B^{32+32·k·(bitlen(q−1)−d)}`. <br>
50-
/// **Output**: `ρ ∈ {0,1}^256`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}−1]`).
50+
/// **Output**: `ρ ∈ B^{32}`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}−1]`).
5151
///
5252
/// # Errors
5353
/// Returns an error when the internal `simple_bit_unpack()` invocation finds an element of
@@ -85,7 +85,7 @@ pub(crate) fn pk_decode<const K: usize, const PK_LEN: usize>(
8585
///
8686
/// This is only used in `ml_dsa::key_gen()` and does not involve untrusted input.
8787
///
88-
/// **Input**: `ρ ∈ {0,1}^256`, `K ∈ {0,1}^256`, `tr ∈ {0,1}^512`,
88+
/// **Input**: `ρ ∈ B^{32}`, `K ∈ B^{32}`, `tr ∈ B^{64}`,
8989
/// `s_1 ∈ R^l` with coefficients in `[−η, η]`,
9090
/// `s_2 ∈ R^k` with coefficients in `[−η, η]`,
9191
/// `t_0 ∈ R^k` with coefficients in `[−2^{d-1}+1, 2^{d-1}]`.
@@ -159,7 +159,7 @@ pub(crate) fn sk_encode<const K: usize, const L: usize, const SK_LEN: usize>(
159159
///
160160
/// **Input**: Private key, `sk ∈ B^{32+32+64+32·((ℓ+k)·bitlen(2η)+d·k)}`
161161
/// Security parameter `η` (eta) must be either 2 or 4.<br>
162-
/// **Output**: `ρ ∈ {0,1}^256`, `K ∈ {0,1}^256`, `tr ∈ {0,1}^512`,
162+
/// **Output**: `ρ ∈ B^{32}`, `K ∈ B^{32}`, `tr ∈ B^{64}`,
163163
/// `s_1 ∈ R^ℓ`, `s_2 ∈ R^k`, `t_0 ∈ R^k` with coefficients in `[−2^{d−1}+1, 2^{d−1}]`.
164164
///
165165
/// # Errors
@@ -168,13 +168,13 @@ pub(crate) fn sk_encode<const K: usize, const L: usize, const SK_LEN: usize>(
168168
pub(crate) fn sk_decode<const K: usize, const L: usize, const SK_LEN: usize>(
169169
eta: i32, sk: &[u8; SK_LEN],
170170
) -> Result<(&[u8; 32], &[u8; 32], &[u8; 64], [R; L], [R; K], [R; K]), &'static str> {
171+
const TOP: i32 = 1 << (D - 1);
171172
debug_assert!((eta == 2) || (eta == 4), "Alg 25: incorrect eta");
172173
debug_assert_eq!(
173174
SK_LEN,
174175
128 + 32 * ((K + L) * bit_length(2 * eta) + D as usize * K),
175176
"Alg 25: bad sk/config size"
176177
);
177-
let top = 1 << (D - 1);
178178
let (mut s_1, mut s_2, mut t_0) = ([R0; L], [R0; K], [R0; K]);
179179

180180
// 1: (rho, 𝐾, tr, 𝑦0 , … , 𝑦ℓ−1 , 𝑧0 , … , 𝑧𝑘−1 , 𝑤0 , … , 𝑤𝑘−1 ) ∈
@@ -211,7 +211,7 @@ pub(crate) fn sk_decode<const K: usize, const L: usize, const SK_LEN: usize>(
211211
for i in 0..K {
212212
//
213213
// 9: t0[i] ← BitUnpack(wi, −2^{d−1} - 1, 2^{d−1}) ▷ This is always in the correct range
214-
t_0[i] = bit_unpack(&sk[start + i * step..start + (i + 1) * step], top - 1, top)?;
214+
t_0[i] = bit_unpack(&sk[start + i * step..start + (i + 1) * step], TOP - 1, TOP)?;
215215

216216
// 10: end for
217217
}
@@ -231,7 +231,7 @@ pub(crate) fn sk_decode<const K: usize, const L: usize, const SK_LEN: usize>(
231231
/// The `CTEST` generic is only passed through to the `hint_bit_pack()` leaf function
232232
/// such that this logic becomes constant-time.
233233
///
234-
/// **Input**: `c_tilde ∈ {0,1}^2λ` (bits),
234+
/// **Input**: `c_tilde ∈ B^{λ/4}`,
235235
/// `z ∈ R^ℓ` with coefficients in `[−1*γ_1 + 1, γ_1]`,
236236
/// `h ∈ R^k_2`. <br>
237237
/// **Output**: Signature, `σ ∈ B^{λ/4+l·32·(1+bitlen(γ_1-1)+ω+k}`

src/hashing.rs

+12-13
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use sha2::{Digest, Sha256, Sha512};
77
use sha3::digest::{ExtendableOutput, Update, XofReader};
88
use sha3::{Shake128, Shake256};
99

10-
1110
/// # Function H(v,d) of section 3.7 item 1 at bottom of page 14.
1211
/// Takes a reference to a list of byte-slice references and runs them through Shake256.
1312
/// Returns a xof reader for extracting extendable output.
@@ -86,7 +85,7 @@ pub(crate) fn sample_in_ball<const CTEST: bool>(tau: i32, rho: &[u8]) -> R {
8685
// 13: end for
8786
}
8887

89-
// slightly redundant...
88+
// slightly redundant, but fuzz target
9089
debug_assert!(
9190
c.0.iter().map(|&e| usize::from(e != 0)).sum::<usize>() == tau,
9291
"Alg 29: bad hamming weight (a)"
@@ -124,10 +123,10 @@ pub(crate) fn rej_ntt_poly<const CTEST: bool>(rhos: &[&[u8]]) -> T {
124123
while j < 256 {
125124
//
126125
// 5: (ctx, 𝑠) ← G.Squeeze(ctx, 3)
127-
// 6: a_hat[j] ← CoefFromThreeBytes(H128(ρ)[[c]], H128(ρ)[[c + 1]], H128(ρ)[[c + 2]])
128-
let mut h128pc = [0u8; 3];
129-
xof.read(&mut h128pc); // implicit c += 3
130-
let a_hat_j = coeff_from_three_bytes::<CTEST>(h128pc); // gets a result
126+
// 6: 𝑎[𝑗] ← CoeffFromThreeBytes(𝑠[0], 𝑠[1], 𝑠[2])
127+
let mut h5 = [0u8; 3];
128+
xof.read(&mut h5); // implicit c += 3
129+
let a_hat_j = coeff_from_three_bytes::<CTEST>(h5); // gets a result
131130

132131
// 7: if a_hat[j] != ⊥ then
133132
if let Ok(res) = a_hat_j {
@@ -154,7 +153,7 @@ pub(crate) fn rej_ntt_poly<const CTEST: bool>(rhos: &[&[u8]]) -> T {
154153
/// The `CTEST` generic is only passed through to the `coef_from_half_byte()` leaf function such
155154
/// that this logic becomes constant-time.
156155
///
157-
/// **Input**: A seed `ρ ∈B^66`. <br>
156+
/// **Input**: A seed `ρ ∈B^{66}`. <br>
158157
/// **Output**: A polynomial `a ∈ Rq`.
159158
pub(crate) fn rej_bounded_poly<const CTEST: bool>(eta: i32, rhos: &[&[u8]]) -> R {
160159
debug_assert_eq!(rhos.iter().map(|&i| i.len()).sum::<usize>(), 528 / 8, "Alg 31: bad rho size");
@@ -221,7 +220,7 @@ pub(crate) fn rej_bounded_poly<const CTEST: bool>(eta: i32, rhos: &[&[u8]]) -> R
221220
/// such that this logic becomes constant-time.
222221
///
223222
/// **Input**: `ρ ∈ B^{32}`. <br>
224-
/// **Output**: Matrix `cap_a_hat ∈ (𝑇𝑞)^{𝑘×ℓ}`
223+
/// **Output**: Matrix `cap_a_hat ∈ 𝑇_𝑞^{𝑘×ℓ}`
225224
#[allow(clippy::cast_possible_truncation)] // s and r as u8
226225
pub(crate) fn expand_a<const CTEST: bool, const K: usize, const L: usize>(
227226
rho: &[u8; 32],
@@ -247,7 +246,7 @@ pub(crate) fn expand_a<const CTEST: bool, const K: usize, const L: usize>(
247246
/// The `CTEST` generic is only passed through to the `rej_bounded_poly()` leaf function
248247
/// such that this logic becomes constant-time.
249248
///
250-
/// **Input**: `ρ ∈ B^64` <br>
249+
/// **Input**: `ρ ∈ B^{64}` <br>
251250
/// **Output**: Vectors `s1`, `s2` of polynomials in `R_q`.
252251
#[allow(clippy::cast_possible_truncation)] // r and r+L
253252
pub(crate) fn expand_s<const CTEST: bool, const K: usize, const L: usize>(
@@ -267,8 +266,8 @@ pub(crate) fn expand_s<const CTEST: bool, const K: usize, const L: usize>(
267266
core::array::from_fn(|r| rej_bounded_poly::<CTEST>(eta, &[rho, &[(r + L) as u8], &[0]]));
268267

269268
// 7: return (s_1 , s_2)
270-
debug_assert!(s1.iter().all(|r| is_in_range(r, eta, eta)), "Alg 27: s1 out of range");
271-
debug_assert!(s2.iter().all(|r| is_in_range(r, eta, eta)), "Alg 27: s2 out of range");
269+
debug_assert!(s1.iter().all(|r| is_in_range(r, eta, eta)), "Alg 33: s1 out of range");
270+
debug_assert!(s2.iter().all(|r| is_in_range(r, eta, eta)), "Alg 33: s2 out of range");
272271
(s1, s2)
273272
}
274273

@@ -277,8 +276,8 @@ pub(crate) fn expand_s<const CTEST: bool, const K: usize, const L: usize>(
277276
/// Samples a vector `s ∈ R^ℓ_q` such that each polynomial `s_j` has coefficients
278277
/// between `−γ_1 + 1` and `γ_1`. This function is not exposed to untrusted input.
279278
///
280-
/// **Input**: A bit string `ρ ∈ {0,1}^512` and a non-negative integer `µ`. <br>
281-
/// **Output**: Vector `y ∈ R^ℓ_q`.
279+
/// **Input**: A bit string `ρ ∈ B^{64}` and a non-negative integer `µ`. <br>
280+
/// **Output**: Vector `y ∈ R^`.
282281
pub(crate) fn expand_mask<const L: usize>(gamma1: i32, rho: &[u8; 64], mu: u16) -> [R; L] {
283282
let mut y = [R0; L];
284283
let mut v = [0u8; 32 * 20]; // leaving a few bytes on the table

src/helpers.rs

+7-8
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::{Q, ZETA};
44
// Some arith routines leverage dilithium https://github.com/PQClean/PQClean/tree/master/crypto_sign
55

66

7-
/// Algorithm 43 `BitRev8()` is not implemented; zetas are pulled from pre-computed table
7+
/// # Algorithm 43 `BitRev8()` is not implemented; zetas are pulled from pre-computed table
88
/// `ZETA_TABLE_MONT`; see below (near end)
99
1010
/// # Macro ensure!()
@@ -44,8 +44,7 @@ pub(crate) const fn partial_reduce64(a: i64) -> i32 {
4444
}
4545

4646

47-
// TODO: need to experiment a little with `mul_red(32, 32)`
48-
#[allow(dead_code)]
47+
#[allow(dead_code)] // I may come back to this and experiment more
4948
#[allow(clippy::cast_possible_truncation)]
5049
pub(crate) const fn partial_reduce64b(a: i64) -> i32 {
5150
const MM: i64 = ((1 << 64) / (Q as i128)) as i64;
@@ -86,9 +85,9 @@ pub(crate) const fn bit_length(x: i32) -> usize { x.ilog2() as usize + 1 }
8685

8786

8887
/// Mod +/- see definition on page 6.
89-
/// If α is a positive integer and m ∈ Z or m ∈ `Z_α` , then m mod± α denotes the unique
90-
/// element m′ ∈ Z in the range −α/2 < m′ ≤ α/2 such that m and m′ are congruent
91-
/// modulo α. 'ready to optimize'
88+
/// If `α` is a positive integer and `m ∈ Z` or `m ∈ Z_α` , then m mod± α denotes the unique
89+
/// element `m′ ∈ Z` in the range `−α/2 < m′ ≤ α/2` such that `m` and `m′` are congruent
90+
/// modulo `α`. 'ready to optimize'
9291
pub(crate) fn center_mod(m: i32) -> i32 {
9392
debug_assert!(m.abs() < 2_143_289_344, "center_mod input"); // for clarity; caught in full_reduce32
9493
let t = full_reduce32(m);
@@ -120,7 +119,7 @@ pub(crate) fn mat_vec_mul<const K: usize, const L: usize>(
120119

121120
// Note Algorithm 44 has been dissolved into its place of use(s)
122121

123-
/// Algorithm 46: `AddVectorNTT(v_hat, w_hat)` on page 45.
122+
/// # Algorithm 46: `AddVectorNTT(v_hat, w_hat)` on page 45.
124123
/// Computes the sum `v_hat + w_hat` of two vectors `v_hat`, `w_hat` over `𝑇_𝑞`.
125124
///
126125
/// **Input**: `ℓ ∈ ℕ, v_hat ∈ 𝑇_𝑞^ℓ , w_hat ∈ 𝑇_𝑞^ℓ`. <br>
@@ -151,7 +150,7 @@ pub(crate) fn infinity_norm<const ROW: usize>(w: &[R; ROW]) -> i32 {
151150
}
152151

153152

154-
/// Algorithm 49: MontgomeryReduce(𝑎) on page 50.
153+
/// # Algorithm 49: MontgomeryReduce(𝑎) on page 50.
155154
/// Computes 𝑎 ⋅ 2−32 mod 𝑞.
156155
///
157156
/// **Input**: Integer 𝑎 with −231 𝑞 ≤ 𝑎 ≤ 231 𝑞.

src/high_low.rs

+1
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ pub(crate) fn make_hint(gamma2: i32, z: Zq, r: Zq) -> bool {
155155
pub(crate) fn use_hint(gamma2: i32, h: Zq, r: Zq) -> Zq {
156156
//
157157
// 1: m ← (q− 1)/(2*γ_2)
158+
// dissolved into steps 3 and 4 below
158159

159160
// 2: (r1, r0) ← Decompose(r)
160161
let (r1, r0) = decompose(gamma2, r);

0 commit comments

Comments
 (0)