diff --git a/benchmark/zig_benchmark/src/cross_lang_zig_tool.zig b/benchmark/zig_benchmark/src/cross_lang_zig_tool.zig
index c4881f1..655bfc7 100644
--- a/benchmark/zig_benchmark/src/cross_lang_zig_tool.zig
+++ b/benchmark/zig_benchmark/src/cross_lang_zig_tool.zig
@@ -240,8 +240,12 @@ fn signCommand(allocator: Allocator, message: []const u8, epoch: u32, lifetime:
 
     var scheme: *hash_zig.GeneralizedXMSSSignatureScheme = undefined;
     const keypair: hash_zig.GeneralizedXMSSSignatureScheme.KeyGenResult = blk: {
+        // For 2^8 lifetime, always regenerate from seed to avoid epoch configuration issues
+        // The keygen -> sign flow for 2^8 can have mismatched active epochs in the SSZ file
+        const skip_ssz_for_2_8 = (lifetime == .lifetime_2_8);
+
         // Try to load SSZ secret key first if use_ssz is true and file exists
-        if (use_ssz) {
+        if (use_ssz and !skip_ssz_for_2_8) {
             if (std.fs.cwd().readFileAlloc(allocator, "tmp/zig_sk.ssz", std.math.maxInt(usize))) |sk_ssz| {
                 defer allocator.free(sk_ssz);
 
@@ -327,7 +331,6 @@ fn signCommand(allocator: Allocator, message: []const u8, epoch: u32, lifetime:
             };
             defer seed_file.close();
 
-            // Read seed hex string
             var buf: [64]u8 = undefined;
             const read_len = try seed_file.readAll(&buf);
             const hex_slice = buf[0..read_len];
@@ -365,18 +368,13 @@ fn signCommand(allocator: Allocator, message: []const u8, epoch: u32, lifetime:
 
         const sk_data = try hash_zig.serialization.deserializeSecretKeyData(allocator, sk_json);
 
-        // Use the original seed (not PRF key) to ensure RNG state matches original keygen
-        // The PRF key was generated from the seed, so we need to start from the seed
-        // and consume RNG state to match where we were after generating parameter and PRF key
         const seed_file = std.fs.cwd().openFile("tmp/zig_seed.hex", .{}) catch {
-            // If seed file is missing, fall back to using PRF key as seed (may not match exactly)
             scheme = try hash_zig.GeneralizedXMSSSignatureScheme.initWithSeed(allocator, lifetime, sk_data.prf_key);
             const kp = try scheme.keyGenWithParameter(sk_data.activation_epoch, sk_data.num_active_epochs, sk_data.parameter, sk_data.prf_key, false);
             break :blk kp;
         };
         defer seed_file.close();
 
-        // Read seed hex string
         var seed_buf: [64]u8 = undefined;
         const seed_read_len = try seed_file.readAll(&seed_buf);
         const seed_hex_slice = seed_buf[0..seed_read_len];
@@ -387,47 +385,13 @@ fn signCommand(allocator: Allocator, message: []const u8, epoch: u32, lifetime:
         }
         _ = try std.fmt.hexToBytes(&seed, seed_hex_slice);
 
-        // Initialize with original seed to match RNG state from keygen
         scheme = try hash_zig.GeneralizedXMSSSignatureScheme.initWithSeed(allocator, lifetime, seed);
 
-        // CRITICAL: We need to match the RNG state exactly as it was when keyGenWithParameter
-        // was called from keyGen(). In keyGen(), the flow is:
-        // 1. generateRandomParameter() - peeks 20 bytes (doesn't consume)
-        // 2. generateRandomPRFKey() - consumes 32 bytes
-        // 3. keyGenWithParameter() - consumes another 32 bytes (to match state after step 2)
-        //
-        // But wait - that's wrong! When keyGenWithParameter is called from keyGen(), the RNG
-        // state is already after consuming 32 bytes. So keyGenWithParameter shouldn't consume
-        // another 32 bytes when called from keyGen(). But it does, which means it's consuming
-        // 64 bytes total when called from keyGen().
-        //
-        // Actually, I think the issue is that keyGenWithParameter is designed to be called
-        // directly (not from keyGen()), so it consumes 32 bytes to match the state after
-        // parameter/PRF key generation. But when called from keyGen(), this causes double
-        // consumption.
-        //
-        // For now, let's NOT consume here, because keyGenWithParameter will consume 32 bytes
-        // internally. But we need to account for the peek (20 bytes) and PRF key (32 bytes).
-        // Actually, the peek doesn't consume, so we just need to consume 32 bytes for the PRF key.
-        // But keyGenWithParameter already does that, so we shouldn't consume here.
-        //
-        // Wait, let me re-read the code. keyGenWithParameter consumes 32 bytes to match the
-        // state AFTER parameter/PRF key generation. So when we call it directly, we need to
-        // have consumed 32 bytes already. But we're starting fresh, so we need to consume
-        // 32 bytes to get to the state after PRF key generation.
-        // CRITICAL: Simulate the exact RNG consumption from keyGen():
-        // 1. generateRandomParameter() - peeks 20 bytes (doesn't consume RNG offset)
-        // 2. generateRandomPRFKey() - consumes 32 bytes (advances RNG offset)
-        //
-        // Even though peek doesn't consume, we should call the actual function to ensure
-        // the RNG state is in the exact same condition. The peek reads from the current
-        // offset without advancing it, but we want to ensure we're reading from the same
-        // position in the RNG stream.
-        _ = try scheme.generateRandomParameter(); // Peek at 20 bytes (doesn't consume)
+        // Simulate RNG consumption from keyGen: peek parameter, consume PRF key
+        _ = try scheme.generateRandomParameter();
         var dummy_prf_key: [32]u8 = undefined;
-        scheme.rng.fill(&dummy_prf_key); // Consume 32 bytes to match generateRandomPRFKey()
+        scheme.rng.fill(&dummy_prf_key);
 
-        // We've already consumed 32 bytes to match PRF key generation, so pass true
         const kp = try scheme.keyGenWithParameter(sk_data.activation_epoch, sk_data.num_active_epochs, sk_data.parameter, sk_data.prf_key, true);
         break :blk kp;
     };
@@ -462,7 +426,6 @@ fn signCommand(allocator: Allocator, message: []const u8, epoch: u32, lifetime:
     }
 
     if (use_ssz) {
-        // IMPORTANT: Also update the public key SSZ to match the regenerated keypair.
         const pk_bytes = try keypair.public_key.toBytes(allocator);
         defer allocator.free(pk_bytes);
         var pk_file = try std.fs.cwd().createFile("tmp/zig_pk.ssz", .{});
@@ -478,9 +441,6 @@ fn signCommand(allocator: Allocator, message: []const u8, epoch: u32, lifetime:
         try sig_file.writeAll(sig_bytes);
         std.debug.print("✅ Signature saved to tmp/zig_sig.ssz ({} bytes)\n", .{sig_bytes.len});
     } else {
-        // IMPORTANT: Also update the public key JSON to match the regenerated keypair.
-        // This ensures that verification (in both Zig and Rust) uses a public key that
-        // is consistent with the trees/roots used during signing.
         const pk_json = try hash_zig.serialization.serializePublicKey(allocator, &keypair.public_key);
         defer allocator.free(pk_json);
         var pk_file = try std.fs.cwd().createFile("tmp/zig_pk.json", .{});
diff --git a/benchmark/zig_benchmark/src/remote_hash_tool.zig b/benchmark/zig_benchmark/src/remote_hash_tool.zig
index 5dfe3c1..4aba6cd 100644
--- a/benchmark/zig_benchmark/src/remote_hash_tool.zig
+++ b/benchmark/zig_benchmark/src/remote_hash_tool.zig
@@ -164,7 +164,7 @@ pub fn writeSignatureBincode(path: []const u8, signature: *const hash_zig.Genera
     }
 
     // Write rho (7 u32 values in CANONICAL form, no length prefix for fixed array)
-    // CRITICAL: Rust's bincode serializes field elements in CANONICAL form (matching path and hashes)
+    // Rust's bincode serializes field elements in CANONICAL form
     // This must match Rust's FieldArray serialization which uses as_canonical_u32()
     const rho = signature.getRho();
     if (rand_len > rho.len) return BincodeError.InvalidRandLength;
@@ -217,7 +217,7 @@ pub fn readSignatureBincode(path: []const u8, allocator: std.mem.Allocator, rand
     
     // Read path nodes (each has: HASH_LEN u32 values in CANONICAL form, NO length prefix for fixed arrays)
     // Rust's bincode serializes Vec<FieldArray<N>> as: Vec length + elements directly (no per-array length)
-    // CRITICAL: Rust writes FieldArray<HASH_LEN> which serializes exactly HASH_LEN elements using as_canonical_u32()
+    // Rust writes FieldArray<HASH_LEN> which serializes exactly HASH_LEN elements
     // For lifetime 2^8/2^32: HASH_LEN=8, Rust writes 8 elements
     // For lifetime 2^18: HASH_LEN=7, Rust writes 7 elements
     // We must read exactly hash_len elements to match Rust's serialization
@@ -225,7 +225,7 @@ pub fn readSignatureBincode(path: []const u8, allocator: std.mem.Allocator, rand
     errdefer allocator.free(path_nodes);
     for (0..path_len) |i| {
         // Read array elements in canonical form (fixed-size array, no length prefix)
-        // CRITICAL: Read exactly hash_len elements (matching Rust's FieldArray<HASH_LEN>)
+        // Read exactly hash_len elements (matching Rust's FieldArray<HASH_LEN>)
         for (0..hash_len) |j| {
             const canonical = try reader.readInt(u32, .little);
             path_nodes[i][j] = FieldElement.fromCanonical(canonical);
@@ -241,7 +241,7 @@ pub fn readSignatureBincode(path: []const u8, allocator: std.mem.Allocator, rand
     allocator.free(path_nodes);
 
     // Read rho (rand_len u32 values in CANONICAL form, no length prefix for fixed array)
-    // CRITICAL: Rust's bincode serializes field elements in CANONICAL form (matching path and hashes)
+    // Rust's bincode serializes field elements in CANONICAL form
     // This must match Rust's FieldArray serialization which uses as_canonical_u32()
     // For lifetime 2^8/2^32: rand_len=7, for lifetime 2^18: rand_len=6
     if (rand_len > 7) {
@@ -268,7 +268,7 @@ pub fn readSignatureBincode(path: []const u8, allocator: std.mem.Allocator, rand
     
     // Read hashes (each has: HASH_LEN u32 values in CANONICAL form, NO length prefix for fixed arrays)
     // Rust's bincode serializes Vec<FieldArray<N>> as: Vec length + elements directly (no per-array length)
-    // CRITICAL: Rust writes FieldArray<HASH_LEN> which serializes exactly HASH_LEN elements using as_canonical_u32()
+    // Rust writes FieldArray<HASH_LEN> which serializes exactly HASH_LEN elements
     // For lifetime 2^8/2^32: HASH_LEN=8, Rust writes 8 elements
     // For lifetime 2^18: HASH_LEN=7, Rust writes 7 elements
     // We must read exactly hash_len elements to match Rust's serialization
@@ -276,7 +276,7 @@ pub fn readSignatureBincode(path: []const u8, allocator: std.mem.Allocator, rand
     errdefer allocator.free(hashes_tmp);
     for (0..hashes_len) |i| {
         // Read array elements in canonical form (fixed-size array, no length prefix)
-        // CRITICAL: Read exactly hash_len elements (matching Rust's FieldArray<HASH_LEN>)
+        // Read exactly hash_len elements (matching Rust's FieldArray<HASH_LEN>)
         for (0..hash_len) |j| {
             const canonical = try reader.readInt(u32, .little);
             hashes_tmp[i][j] = FieldElement.fromCanonical(canonical);
diff --git a/investigations/test/rust_compatibility_test.zig b/investigations/test/rust_compatibility_test.zig
index 0512d59..1f280fd 100644
--- a/investigations/test/rust_compatibility_test.zig
+++ b/investigations/test/rust_compatibility_test.zig
@@ -5,7 +5,7 @@ const std = @import("std");
 const log = @import("hash-zig").utils.log;
 const hash_zig = @import("hash-zig");
 
-// CRITICAL: Comprehensive Rust compatibility test
+// Comprehensive Rust compatibility test
 test "rust compatibility: GeneralizedXMSS validation (CRITICAL)" {
     const allocator = std.testing.allocator;
 
diff --git a/src/hash/poseidon2_hash_simd.zig b/src/hash/poseidon2_hash_simd.zig
index 4d017c8..18d8fea 100644
--- a/src/hash/poseidon2_hash_simd.zig
+++ b/src/hash/poseidon2_hash_simd.zig
@@ -41,7 +41,7 @@ pub const Poseidon2SIMD = struct {
     /// Input: packed_input is [element][lane] format (vertical packing)
     /// Output: packed_output is [element][lane] format
     ///
-    /// CRITICAL OPTIMIZATION: Writes to pre-allocated output buffer instead of allocating.
+    /// Writes to pre-allocated output buffer instead of allocating.
     /// This matches Rust's approach of returning fixed-size stack arrays, eliminating
     /// 114,688 allocations in chain walking (64 chains × 7 steps × 256 batches).
     pub fn compress16SIMD(
@@ -187,7 +187,7 @@ pub const Poseidon2SIMD = struct {
     /// Input: packed_input is [element][lane] format (vertical packing)
     /// Output: packed_output is [element][lane] format
     ///
-    /// CRITICAL OPTIMIZATION: Writes to pre-allocated output buffer instead of allocating.
+    /// Writes to pre-allocated output buffer instead of allocating.
     /// This matches Rust's approach of returning fixed-size stack arrays.
     pub fn compress24SIMD(
         self: *Poseidon2SIMD,
diff --git a/src/poseidon2/poseidon2.zig b/src/poseidon2/poseidon2.zig
index c0499ea..e0f3f41 100644
--- a/src/poseidon2/poseidon2.zig
+++ b/src/poseidon2/poseidon2.zig
@@ -142,7 +142,7 @@ pub fn sbox(x: F) F {
 }
 
 // Apply MDS matrix to 4 elements (exact Plonky3 logic from apply_mat4)
-// CRITICAL FIX: Rust uses .clone() to preserve original values, so we must store them first
+// Rust uses .clone() to preserve original values, so we must store them first
 // Matrix: [ 2 3 1 1 ]
 //         [ 1 2 3 1 ]
 //         [ 1 1 2 3 ]
@@ -520,7 +520,7 @@ pub fn poseidon2_24_plonky3(state: []F) void {
 // FIX: Rust applies MDS light BEFORE the first external round for 24-width!
 // This matches Rust's external_initial_permute_state which calls mds_light_permutation first.
 pub fn poseidon2_24_plonky3_with_mds_light(state: []F, apply_mds_light: bool) void {
-    // CRITICAL FIX: Rust applies MDS light BEFORE the first external round for 24-width!
+    // Rust applies MDS light BEFORE the first external round for 24-width
     // This matches Rust's external_initial_permute_state behavior.
     // For 24-width, we should ALWAYS apply MDS light first (matching Rust).
     // The apply_mds_light parameter is kept for backward compatibility but should be true for 24-width.
@@ -529,7 +529,7 @@ pub fn poseidon2_24_plonky3_with_mds_light(state: []F, apply_mds_light: bool) vo
     }
 
     // Initial external rounds (4 rounds)
-    // CRITICAL: Rust's external_terminal_permute_state applies MDS light INSIDE each round
+    // Rust's external_terminal_permute_state applies MDS light INSIDE each round
     // So each external round does: add_rc_and_sbox, then mds_light_permutation
     // NOT: add_rc_and_sbox, then full MDS matrix
     for (0..4) |i| {
@@ -541,7 +541,7 @@ pub fn poseidon2_24_plonky3_with_mds_light(state: []F, apply_mds_light: bool) vo
         for (state) |*elem| {
             elem.* = sbox(elem.*);
         }
-        // CRITICAL FIX: Apply MDS light (not full MDS matrix) - matching Rust's external_terminal_permute_state
+        // Apply MDS light (not full MDS matrix) - matching Rust's external_terminal_permute_state
         mds_light_permutation_24(state);
     }
 
@@ -551,7 +551,7 @@ pub fn poseidon2_24_plonky3_with_mds_light(state: []F, apply_mds_light: bool) vo
     }
 
     // Final external rounds (4 rounds)
-    // CRITICAL: Rust's external_terminal_permute_state applies MDS light INSIDE each round
+    // Rust's external_terminal_permute_state applies MDS light INSIDE each round
     // So each external round does: add_rc_and_sbox, then mds_light_permutation
     // NOT: add_rc_and_sbox, then full MDS matrix
     for (0..4) |i| {
@@ -563,7 +563,7 @@ pub fn poseidon2_24_plonky3_with_mds_light(state: []F, apply_mds_light: bool) vo
         for (state) |*elem| {
             elem.* = sbox(elem.*);
         }
-        // CRITICAL FIX: Apply MDS light (not full MDS matrix) - matching Rust's external_terminal_permute_state
+        // Apply MDS light (not full MDS matrix) - matching Rust's external_terminal_permute_state
         mds_light_permutation_24(state);
     }
 }
diff --git a/src/prf/shake_prf_to_field.zig b/src/prf/shake_prf_to_field.zig
index a897bc5..c7d5166 100644
--- a/src/prf/shake_prf_to_field.zig
+++ b/src/prf/shake_prf_to_field.zig
@@ -6,7 +6,7 @@ const crypto = std.crypto;
 const plonky3_field = @import("../poseidon2/plonky3_field.zig");
 
 // Constants matching Rust implementation
-// CRITICAL: Rust uses 16 bytes per FE (reads as u128), not 8!
+// Rust uses 16 bytes per FE (reads as u128), not 8
 const PRF_BYTES_PER_FE: usize = 16;
 const KEY_LENGTH: usize = 32; // 32 bytes
 const MESSAGE_LENGTH: usize = 32; // From Rust hash-sig
diff --git a/src/signature/native/poseidon_top_level.zig b/src/signature/native/poseidon_top_level.zig
index 23f909e..fc818f6 100644
--- a/src/signature/native/poseidon_top_level.zig
+++ b/src/signature/native/poseidon_top_level.zig
@@ -103,7 +103,7 @@ fn prepareLayerInfo(allocator: std.mem.Allocator, w: usize) !AllLayerInfoForBase
 
         const prev_info = all_info[v - 1];
         for (0..max_d + 1) |d| {
-            // CRITICAL FIX: Use Rust's formula from Lemma 8 in eprint 2025/889
+            // Use Rust's formula from Lemma 8 in eprint 2025/889
             // The original simple loop was incorrect - it worked for Zig→Zig because both
             // sign and verify used the same wrong formula, but Rust→Zig failed.
             // Rust's test confirms the formula is correct.
@@ -313,16 +313,16 @@ pub fn mapToVertexBig(
     log.print("ZIG_MAP_VERTEX_DEBUG: START layer={} d_curr={} x_curr={s}\n", .{ layer, d_curr, x_curr_str });
 
     for (1..DIMENSION) |i| {
-        // CRITICAL FIX: Initialize ji to a sentinel value matching Rust's usize::MAX
+        // Initialize ji to a sentinel value matching Rust's usize::MAX
         // Rust: let mut ji = usize::MAX;
         var ji: usize = std.math.maxInt(usize);
         const sub_dim = DIMENSION - i;
-        // CRITICAL FIX: Use saturating_sub to match Rust's behavior exactly
+        // Use saturating_sub to match Rust's behavior
         // Rust: range_start = d_curr.saturating_sub((w - 1) * (v - i))
         const range_start = if (d_curr >= (BASE - 1) * sub_dim) d_curr - (BASE - 1) * sub_dim else 0;
         const sub_info = layer_data.get(sub_dim);
 
-        // CRITICAL FIX: Match Rust's loop exactly: for j in range_start..=min(w - 1, d_curr)
+        // Match Rust's loop: for j in range_start..=min(w - 1, d_curr)
         // Rust uses inclusive range: range_start..=min(w - 1, d_curr)
         const limit = @min(BASE - 1, d_curr);
         var j: usize = range_start;
@@ -332,17 +332,16 @@ pub fn mapToVertexBig(
         defer self.allocator.free(x_curr_str_loop);
         log.print("ZIG_MAP_VERTEX_DEBUG: i={} d_curr={} range_start={} limit={} x_curr={s}\n", .{ i, d_curr, range_start, limit, x_curr_str_loop });
 
-        // CRITICAL: Loop must be inclusive on both ends (j <= limit), matching Rust's ..= operator
+        // Loop must be inclusive on both ends (j <= limit), matching Rust's ..= operator
         while (j <= limit) : (j += 1) {
             const sub_layer = d_curr - j;
-            // CRITICAL FIX: In Rust, accessing out-of-bounds would panic.
             // For valid inputs, sub_layer should always be within bounds.
             // If it's out of bounds, it means our input is invalid, so we should return an error.
             if (sub_layer >= sub_info.sizes.len) {
                 return error.InvalidHypercubeMapping;
             }
             const count = &sub_info.sizes[sub_layer];
-            // CRITICAL FIX: Match Rust's comparison exactly: if x_curr >= *count
+            // Match Rust's comparison: if x_curr >= *count
             const cmp = x_curr.order(count.*);
             const count_str = try std.fmt.allocPrint(self.allocator, "{}", .{count.*.toConst()});
             defer self.allocator.free(count_str);
@@ -360,7 +359,7 @@ pub fn mapToVertexBig(
             }
         }
 
-        // CRITICAL FIX: Match Rust's assertion: assert!(ji < w)
+        // Match Rust's assertion: assert!(ji < w)
         // If ji is still the sentinel value, it means we never found a valid j
         if (ji >= BASE) return error.InvalidHypercubeMapping;
 
@@ -385,7 +384,6 @@ pub fn mapIntoHypercubePart(
     final_layer: usize,
     field_elements: []const FieldElement,
 ) ![]u8 {
-    // CRITICAL DEBUG: Print immediately to verify function is called
     log.print("ZIG_HYPERCUBE_DEBUG: mapIntoHypercubePart CALLED DIMENSION={} BASE={} final_layer={} field_elements.len={}\n", .{ DIMENSION, BASE, final_layer, field_elements.len });
 
     const layer_data = try getLayerData(self, BASE);
@@ -403,7 +401,7 @@ pub fn mapIntoHypercubePart(
     // Reuse stderr from above (already declared at line 390)
     log.print("ZIG_HYPERCUBE_DEBUG: dom_size {s}\n", .{mod_str});
 
-    // CRITICAL FIX: Match Rust's algorithm exactly
+    // Match Rust's algorithm exactly
     // Rust: acc = 0; for fe in field_elements: acc = acc * ORDER + fe; acc %= dom_size
     // We must build the full big integer first, then apply modulo (not during combination)
     var acc = try BigInt.initSet(self.allocator, 0);
@@ -423,10 +421,10 @@ pub fn mapIntoHypercubePart(
             log.print("fe[{}]=0x{x:0>8} ", .{ i, fe.toCanonical() });
         }
         // Build big integer: acc = acc * ORDER + fe (matching Rust exactly)
-        // CRITICAL FIX: Use BigInt.add with temporary BigInt for field element to match Rust's BigUint addition
+        // Use BigInt.add with temporary BigInt for field element to match Rust's BigUint addition
         try BigInt.mul(&tmp, &acc, &multiplier);
         // Create BigInt from field element (matching Rust's fe.as_canonical_biguint())
-        // CRITICAL: Ensure we use the canonical value as u64 to match Rust's BigUint::from behavior
+        // Ensure we use the canonical value as u64 to match Rust's BigUint::from behavior
         const fe_canonical = fe.toCanonical();
         try fe_bigint.set(@as(u64, fe_canonical));
         try BigInt.add(&acc, &tmp, &fe_bigint);
@@ -491,7 +489,6 @@ pub fn applyTopLevelPoseidonMessageHash(
     randomness: []const FieldElement,
     message: [32]u8,
 ) ![]u8 {
-    // CRITICAL DEBUG: Print all inputs to compare signing vs verification
     log.print("ZIG_POS_INPUTS: epoch={} parameter[0] (canonical)=0x{x:0>8} (Montgomery)=0x{x:0>8} randomness[0] (Montgomery)=0x{x:0>8} message[0..4]=", .{ epoch, parameter[0].toCanonical(), parameter[0].toMontgomery(), randomness[0].toMontgomery() });
     for (0..@min(4, message.len)) |i| {
         log.print("0x{x:0>2} ", .{message[i]});
diff --git a/src/signature/native/scheme.zig b/src/signature/native/scheme.zig
index abfed2f..089f917 100644
--- a/src/signature/native/scheme.zig
+++ b/src/signature/native/scheme.zig
@@ -283,7 +283,7 @@ fn serializePaddedLayer(layer: *const PaddedLayer, l: *std.ArrayList(u8), skip_p
     const nodes_offset: u32 = 12; // 8 + 4 = 12
     try ssz.serialize(u32, nodes_offset, l);
 
-    // CRITICAL: For padded single-node roots, Rust only serializes the real node (not padding)
+    // For padded single-node roots, Rust only serializes the real node (not padding)
     // - If start_index is even (0), padding is at the back: serialize nodes[0]
     // - If start_index is odd (1), padding is at the front: serialize nodes[1]
     const nodes_to_serialize = if (skip_padding) blk: {
@@ -985,8 +985,7 @@ pub const GeneralizedXMSSSignature = struct {
         if (serialized.len < rho_start + rho_size) return error.InvalidLength;
         var rho_canonical: [7]u32 = undefined;
         var rho_offset: usize = rho_start;
-        // CRITICAL FIX: ssz.deserialize doesn't work correctly for fixed-size arrays
-        // Deserialize each u32 individually instead
+        // ssz.deserialize doesn't work correctly for fixed-size arrays, deserialize each u32 individually
         for (0..rand_len_decode) |i| {
             if (serialized.len < rho_offset + 4) return error.InvalidLength;
             var val: u32 = undefined;
@@ -1221,7 +1220,7 @@ pub const GeneralizedXMSSPublicKey = struct {
     // SSZ serialization methods
     pub fn sszEncode(self: *const GeneralizedXMSSPublicKey, l: *std.ArrayList(u8)) !void {
         // Convert root to canonical u32 array and serialize
-        // CRITICAL: Rust encodes root based on hash_len_fe, not always 8 u32s
+        // Rust encodes root based on hash_len_fe, not always 8 u32s
         // For lifetime 2^18 (hash_len_fe=7), encode as [7]u32 (28 bytes)
         // For lifetime 2^8/2^32 (hash_len_fe=8), encode as [8]u32 (32 bytes)
         const hash_len = self.hash_len_fe;
@@ -1260,8 +1259,7 @@ pub const GeneralizedXMSSPublicKey = struct {
         if (root_size != 28 and root_size != 32) return error.InvalidLength;
         const hash_len: usize = if (root_size == 28) 7 else 8;
 
-        // CRITICAL FIX: ssz.deserialize doesn't work correctly for fixed-size arrays
-        // Deserialize each u32 individually instead
+        // ssz.deserialize doesn't work correctly for fixed-size arrays, deserialize each u32 individually
         var root_canonical: [8]u32 = undefined;
         var root_offset: usize = 0;
         for (0..hash_len) |i| {
@@ -1282,8 +1280,7 @@ pub const GeneralizedXMSSPublicKey = struct {
         }
 
         // Decode parameter (20 bytes for 5 u32s) - starts after root
-        // CRITICAL FIX: ssz.deserialize doesn't work correctly for fixed-size arrays
-        // Deserialize each u32 individually instead
+        // ssz.deserialize doesn't work correctly for fixed-size arrays, deserialize each u32 individually
         const param_offset = root_size; // Parameter starts after root
         if (serialized.len < param_offset + 20) return error.InvalidLength;
         var param_canonical: [5]u32 = undefined;
@@ -2001,7 +1998,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                     // Runtime detection is available via simd_cpu.getSIMDWidth() but can't be used for types
                     const SIMD_WIDTH = simd_utils.SIMD_WIDTH;
 
-                    // CRITICAL OPTIMIZATION: Create Poseidon2SIMD instance once per thread and reuse it
+                    // Create Poseidon2SIMD instance once per thread and reuse it
                     // This matches Rust's approach: let chain_perm = poseidon2_16();
                     var simd_poseidon2 = poseidon2_simd.Poseidon2SIMD.init(ctx.scheme.allocator, ctx.scheme.poseidon2);
 
@@ -2045,7 +2042,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                             break :blk epochs;
                         };
 
-                        // CRITICAL OPTIMIZATION: Use stack-allocated array instead of heap allocation
+                        // Use stack-allocated array instead of heap allocation
                         // This matches Rust's approach: let mut packed_chains: [[PackedF; HASH_LEN]; NUM_CHUNKS]
                         // num_chains is always 64, hash_len is always 8, so [64][8]PackedF = ~2KB per thread (safe for stack)
                         // OPTIMIZATION: Explicitly align for SIMD (16-byte for NEON/SSE, 32-byte for AVX-512)
@@ -2110,8 +2107,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                         // Walk all chains for all epochs in batch using SIMD
                         const chain_length = ctx.scheme.lifetime_params.base;
                         for (0..ctx.num_chains) |chain_index| {
-                            // CRITICAL FIX: walkChainsSIMD does `for (0..chain_length - 1)`, so we need to pass
-                            // the full chain_length (base) to get base - 1 steps, matching computeHashChainDomain
+                            // walkChainsSIMD does `for (0..chain_length - 1)`, so pass full chain_length to get base - 1 steps
                             ctx.scheme.walkChainsSIMD(
                                 &simd_poseidon2, // Pass reusable instance
                                 &packed_chains_slices, // Use stack-allocated slices
@@ -2327,7 +2323,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // Build bottom tree layers from leaf domains (shared with signing path)
-        // CRITICAL: Store layers so they can be reused during signing (major performance optimization!)
+        // Store layers so they can be reused during signing
         if (profile_keygen) {
             leaf_time_ns = leaf_timer.read();
             tree_timer = try std.time.Timer.start();
@@ -2561,7 +2557,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
     }
 
     /// Apply Poseidon2 chain tweak hash (matching Rust chain_tweak)
-    /// CRITICAL OPTIMIZATION: Uses stack allocation instead of heap allocation for hot path
+    /// Uses stack allocation instead of heap allocation for hot path
     pub inline fn applyPoseidonChainTweakHash(
         self: *GeneralizedXMSSSignatureScheme,
         input: [8]FieldElement,
@@ -2581,7 +2577,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         // Only use hash_len_fe elements from input (7 for lifetime 2^18, 8 for lifetime 2^8)
         const hash_len = self.lifetime_params.hash_len_fe;
 
-        // CRITICAL: Ensure input[hash_len..8] is zero before using it
+        // Ensure input[hash_len..8] is zero before using it
         // Even though we only copy input[0..hash_len], we want to be explicit about this
         var sanitized_input: [8]FieldElement = input;
         @memset(sanitized_input[hash_len..8], FieldElement.zero());
@@ -2603,14 +2599,14 @@ pub const GeneralizedXMSSSignatureScheme = struct {
 
         // Copy input (already in Montgomery form) - only hash_len_fe elements
         // Rust: single.iter() iterates over all HASH_LEN elements
-        // CRITICAL: Only use the first hash_len elements from input[8]FieldElement
+        // Only use the first hash_len elements from input[8]FieldElement
         // For lifetime 2^18: hash_len=7, so we only use input[0..7]
         // For lifetime 2^8/2^32: hash_len=8, so we use input[0..8]
         @memcpy(combined_input_slice[7 .. 7 + hash_len], sanitized_input[0..hash_len]);
 
         // OPTIMIZATION: Use direct Poseidon2-16 compression with stack-allocated state
         // This avoids the heap allocation in hashFieldElements16
-        // CRITICAL: combined_input_slice values are already in Montgomery form (FieldElement.value)
+        // combined_input_slice values are already in Montgomery form
         // We need to convert them to the KoalaBearField type which also uses Montgomery form
         const poseidon2_root = @import("../../poseidon2/root.zig");
         const F = poseidon2_root.Poseidon2KoalaBear16.Field;
@@ -2639,7 +2635,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // Return type is [8]FieldElement, so pad with zeros if hash_len < 8
-        // CRITICAL: state[i].value is already in Montgomery form, so use it directly
+        // state[i].value is already in Montgomery form
         // Don't use toU32() which converts to canonical - we need Montgomery form
         var result: [8]FieldElement = undefined;
         for (0..hash_len) |i| {
@@ -2720,7 +2716,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const hash_len = self.lifetime_params.hash_len_fe;
         const CHAIN_COMPRESSION_WIDTH = 16; // 5 param + 2 tweak + 8 hash + 1 padding
 
-        // CRITICAL OPTIMIZATION: Use stack-allocated array instead of heap allocation
+        // Use stack-allocated array instead of heap allocation
         // This matches Rust's approach: let mut packed_input = [PackedF::ZERO; CHAIN_COMPRESSION_WIDTH];
         // OPTIMIZATION: Align for SIMD (16-byte for NEON/SSE, 32-byte for AVX-512)
         const SIMD_WIDTH_LOCAL = simd_utils.SIMD_WIDTH;
@@ -2764,7 +2760,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const SIMD_WIDTH = simd_utils.SIMD_WIDTH;
         const field = @import("../../core/field.zig");
 
-        // CRITICAL OPTIMIZATION: Use stack-allocated buffer for packed_next
+        // Use stack-allocated buffer for packed_next
         // hash_len is at most 8, so 8 PackedF = 128 bytes (safe for stack)
         // This avoids 64 chains × 7 steps = 448 allocations per batch!
         // OPTIMIZATION: Explicitly align for SIMD (16-byte for NEON/SSE, 32-byte for AVX-512)
@@ -2921,10 +2917,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             if (build_opts.enable_debug_logs and parent_start == 0 and current_level <= 3) {
                 log.debugPrint("ZIG_TREEBUILD_HASH: Bottom tree 0, level {}, i={}, parent_pos {}: left[0]=0x{x:0>8}, right[0]=0x{x:0>8}, parent[0]=0x{x:0>8}\n", .{ current_level, i, parent_pos, left_slice[0].value, right_slice[0].value, hash_result[0].value });
             }
-            // CRITICAL DEBUG: For bottom tree 0, level 0, i=0, log when enabled
-            if (build_opts.enable_debug_logs and current_level == 0 and i == 0) {
-                log.debugPrint("ZIG_TREEBUILD_CRITICAL: level={}, i={}, parent_start={}, parent_pos={}, left[0]=0x{x:0>8}, right[0]=0x{x:0>8}, result[0]=0x{x:0>8}\n", .{ current_level, i, parent_start, parent_pos, left_slice[0].value, right_slice[0].value, hash_result[0].value });
-            }
             // Debug: For bottom tree 0, level 1, i=0, log the result (should match node 0's result)
             if (build_opts.enable_debug_logs and parent_start == 0 and current_level == 1 and i == 0) {
                 log.debugPrint("ZIG_TREEBUILD_HASH: Bottom tree 0, level 1, i=0, parent_pos {}: parent[0]=0x{x:0>8} (should match node 0's result)\n", .{ parent_pos, hash_result[0].value });
@@ -2979,7 +2971,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // OPTIMIZATION: Compute tweaks and pack directly (avoid intermediate buffers)
-        // CRITICAL FIX: Convert tweak values to Montgomery form (matching scalar version which uses FieldElement.fromCanonical)
+        // Convert tweak values to Montgomery form (matching scalar version)
         var tweak_values_0: [SIMD_WIDTH]u32 = undefined;
         var tweak_values_1: [SIMD_WIDTH]u32 = undefined;
         for (0..actual_batch_size) |lane| {
@@ -3032,7 +3024,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         // Debug output removed for performance
 
         // Use SIMD Poseidon2-24 compression
-        // CRITICAL OPTIMIZATION: Use stack-allocated buffer instead of heap allocation
+        // Use stack-allocated buffer instead of heap allocation
         // hash_len is at most 8, so 8 PackedF = 128 bytes (safe for stack)
         // OPTIMIZATION: Align for SIMD (16-byte for NEON/SSE, 32-byte for AVX-512)
         const align_bytes_results = if (SIMD_WIDTH == 8) 32 else 16;
@@ -3517,7 +3509,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         // Only use hash_len_fe elements from each domain (7 for lifetime 2^18, 8 for lifetime 2^8)
         const hash_len = self.lifetime_params.hash_len_fe;
         const flattened_len = chain_domains_in.len * hash_len;
-        // CRITICAL OPTIMIZATION: Use stack allocation instead of heap
+        // Use stack allocation instead of heap
         // Max size: 64 chains × 8 elements = 512 FieldElements = 2KB (safe for stack)
         var flattened_input_stack: [512]FieldElement = undefined;
         const flattened_input = flattened_input_stack[0..flattened_len];
@@ -3581,7 +3573,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // Extract capacity_value in Montgomery form (matching Rust's return type [F; OUT_LEN])
-        // CRITICAL OPTIMIZATION: Use stack allocation instead of heap
+        // Use stack allocation instead of heap
         // CAPACITY is 9, so 9 elements = 36 bytes (safe for stack)
         var capacity_value_monty_stack: [9]F = undefined;
         const capacity_value_monty = capacity_value_monty_stack[0..CAPACITY];
@@ -3611,7 +3603,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         // Debug: print lengths
         log.print("ZIG_SPONGE_DEBUG: Lengths - parameter_len={}, tweak_len_fe={}, flattened_len={}, combined_input_len={}\n", .{ self.lifetime_params.parameter_len, self.lifetime_params.tweak_len_fe, flattened_len, combined_input_len });
 
-        // CRITICAL OPTIMIZATION: Use stack allocation instead of heap
+        // Use stack allocation instead of heap
         // Max size: 5 (param) + 2 (tweak) + 512 (flattened) = 519 elements = ~2KB (safe for stack)
         var combined_input_monty_stack: [600]F = undefined; // 600 to allow for padding
         const combined_input_monty = combined_input_monty_stack[0..combined_input_len];
@@ -3659,7 +3651,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const input_remainder = combined_input_monty.len % RATE;
         const extra_elements = if (input_remainder == 0) 0 else (RATE - input_remainder) % RATE;
         const padded_input_len = combined_input_monty.len + extra_elements;
-        // CRITICAL OPTIMIZATION: Use stack allocation instead of heap
+        // Use stack allocation instead of heap
         // Max size: 519 + 15 (max padding) = 534 elements = ~2KB (safe for stack)
         var padded_input_stack: [600]F = undefined;
         const padded_input = padded_input_stack[0..padded_input_len];
@@ -3679,7 +3671,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         for (0..RATE) |i| {
             state[i] = F.zero; // Zero in Montgomery is still zero
         }
-        // CRITICAL: state[RATE + i] = capacity_value_monty[i] means:
+        // state[RATE + i] = capacity_value_monty[i]
         //   state[15 + 0] = capacity_value[0] -> state[15] = capacity_value[0] ✓
         //   state[15 + 1] = capacity_value[1] -> state[16] = capacity_value[1] ✓
         //   ...
@@ -3854,7 +3846,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // STEP 2: Create domain separator (same for all epochs)
-        // CRITICAL FIX: Compute using scalar then verify SIMD produces same result
         // Rust computes this with PackedF, but since all lanes have same value, scalar+broadcast should be equivalent
         // However, to match Rust exactly, we should compute with SIMD
         const DOMAIN_PARAMETERS_LENGTH: usize = 4;
@@ -3886,7 +3877,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // Pack capacity value to all SIMD lanes (same for all epochs)
-        // CRITICAL: Even though Rust computes with PackedF, scalar+broadcast should be equivalent
         // since all lanes have the same value. But let's verify this is correct.
         var packed_capacity_value: [9]simd_utils.PackedF = undefined;
         for (0..CAPACITY) |i| {
@@ -3958,7 +3948,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
 
             // Add chunk to rate part of state (SIMD addition with modular reduction)
             // OPTIMIZED: Batch process additions with SIMD modular reduction
-            // CRITICAL: Must use modular addition to match Rust's field addition behavior
+            // Use modular addition to match Rust's field addition behavior
             // Rust's PackedF addition uses field addition which includes modular reduction
             // Algorithm: sum = a +% b; if sum >= p then sum -= p
             // In Montgomery form, values are in [0, 2p), so after addition they're in [0, 4p)
@@ -4042,7 +4032,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // Start with the lowest layer, padded accordingly (matching Rust HashTreeLayer::padded)
-        // CRITICAL: Use dummy RNG for bottom trees (matching Rust implementation)
+        // Use dummy RNG for bottom trees (matching Rust implementation)
         // Rust uses StdRng::seed_from_u64(0) for bottom trees because they're full and padding is removed
         // This allows parallel building without affecting RNG determinism
         var dummy_rng = std.Random.DefaultPrng.init(0);
@@ -4103,7 +4093,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             current_level = next_level;
         }
 
-        // CRITICAL: Truncate to final_depth = 4 layers like Rust does
+        // Truncate to final_depth = 4 layers like Rust does
         // Rust truncates to depth/2 = 4 layers and gets root from layer 4
         // According to Rust: bottom_tree_root = bottom_tree.layers[depth / 2].nodes[bottom_tree_index % 2]
         // where depth = 8, so depth/2 = 4, and we need the root from layer 4
@@ -4235,7 +4225,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             for (1..8) |j| leaf_nodes[i][j] = FieldElement{ .value = 0 };
         }
 
-        // CRITICAL: Use dummy RNG for bottom trees (matching Rust implementation)
+        // Use dummy RNG for bottom trees (matching Rust implementation)
         // Rust uses StdRng::seed_from_u64(0) for bottom trees because they're full and padding is removed
         // This allows parallel building without affecting RNG determinism
         var dummy_rng = std.Random.DefaultPrng.init(0);
@@ -4287,13 +4277,13 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             layers.deinit();
         }
 
-        // CRITICAL: Use dummy RNG for bottom trees (matching Rust implementation)
+        // Use dummy RNG for bottom trees (matching Rust implementation)
         // Rust uses StdRng::seed_from_u64(0) for bottom trees because they're full and padding is removed
         // This allows parallel building without affecting RNG determinism
         var dummy_rng = std.Random.DefaultPrng.init(0);
         const dummy_rng_random = dummy_rng.random();
 
-        // CRITICAL OPTIMIZATION: Pass leaf_nodes_in directly to padLayer
+        // Pass leaf_nodes_in directly to padLayer
         // padLayer allocates its own array and copies the input, so we don't need an intermediate copy
         var current_layer = try self.padLayerWithRng(leaf_nodes_in, start_index, &dummy_rng_random);
         try layers.append(.{ .nodes = try self.allocator.alloc([8]FieldElement, current_layer.nodes.len), .start_index = current_layer.start_index });
@@ -4310,7 +4300,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
 
         var current_level: usize = 0;
         while (current_level < full_depth) : (current_level += 1) {
-            // CRITICAL FIX: Use current_layer.start_index (which is the previous layer at start of iteration)
             // This matches Rust's behavior: prev = &layers[level - lowest_layer]
             // At the start of each iteration, current_layer is the previous layer
             const prev_layer_start_index = current_layer.start_index;
@@ -4367,7 +4356,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             const parents_len = current_layer.nodes.len / 2;
             const parents = try self.allocator.alloc([8]FieldElement, parents_len);
 
-            // CRITICAL DEBUG: For first top tree level (current_level=16), log the first hash inputs
             if (current_level == lowest_layer and parents_len > 0) {
                 log.debugPrint("ZIG_KEYGEN_DEBUG: First top tree level (level={}): hashing nodes[0] and nodes[1] with parent_start={}, parent_pos={}\n", .{ current_level, parent_start, parent_start + 0 });
                 log.debugPrint("ZIG_KEYGEN_DEBUG:   nodes[0][0]=0x{x:0>8} (root of bottom tree {})\n", .{ current_layer.nodes[0][0].value, start_index });
@@ -4800,7 +4788,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         activation_epoch: usize,
         num_active_epochs: usize,
     ) !KeyGenResult {
-        // CRITICAL: Rust leansig library multiplies num_active_epochs by 128 internally
+        // Rust leansig library multiplies num_active_epochs by 128 internally
         // To match Rust's behavior exactly, we multiply by 128 here
         // Example: Input 1024 -> Rust stores 131072 (1024 * 128) in SSZ
         const rust_compatible_num_active_epochs = num_active_epochs * 128;
@@ -4855,7 +4843,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const expanded_activation_epoch = activation_epoch;
         const expanded_num_active_epochs = num_active_epochs;
 
-        // CRITICAL: Consume RNG state only if it hasn't been consumed yet.
+        // Consume RNG state only if it hasn't been consumed yet
         // When called from keyGen(), the RNG state is already after PRF key generation (32 bytes consumed).
         // When called directly, we need to consume 32 bytes to match that state.
         if (!rng_already_consumed) {
@@ -5017,7 +5005,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                 }
             }
 
-            // CRITICAL: Verify all trees were built and roots stored
             // This ensures we don't proceed with incomplete data
             for (2..num_bottom_trees) |i| {
                 if (trees[i] == null) {
@@ -5083,7 +5070,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const left_bottom_tree = trees[0] orelse return error.InvalidBottomTree;
         const right_bottom_tree = if (num_bottom_trees > 1) trees[1] orelse return error.InvalidBottomTree else return error.InsufficientBottomTrees;
 
-        // CRITICAL: Verify that the bottom tree roots stored in trees match the roots used to build the top tree
         // This ensures consistency - if they don't match, the signature's Merkle path won't verify correctly
         const left_tree_root = left_bottom_tree.root();
         const right_tree_root = right_bottom_tree.root();
@@ -5184,7 +5170,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             }
             log.print("\n", .{});
 
-            // CRITICAL DEBUG: Print parameter BEFORE creating secret key
             log.print("ZIG_KEYGEN_DEBUG: Parameter passed to secret_key.init (canonical): ", .{});
             for (0..5) |i| {
                 log.print("0x{x:0>8} ", .{parameter[i].toCanonical()});
@@ -5208,7 +5193,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             right_bottom_tree,
         );
 
-        // CRITICAL: Compute chain domain for epoch 1, chain 0 to verify it matches what we'll get during signing
         if (build_opts.enable_debug_logs) {
             const keygen_prf_domain = self.prfDomainElement(prf_key, 1, 0);
             const keygen_chain_domain = try self.computeHashChainDomain(keygen_prf_domain, 1, 0, parameter);
@@ -5218,7 +5202,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             log.print(", parameter[0]=0x{x:0>8} (canonical: 0x{x:0>8})\n", .{ parameter[0].value, parameter[0].toCanonical() });
         }
 
-        // CRITICAL DEBUG: Verify the parameter is correctly stored in the secret key
         log.print("ZIG_KEYGEN_DEBUG: Secret key parameter after init (canonical): ", .{});
         for (0..5) |i| {
             log.print("0x{x:0>8} ", .{secret_key.parameter[i].toCanonical()});
@@ -5274,7 +5257,7 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         }
 
         // Generate Merkle path via combined bottom+top tree layers
-        // CRITICAL OPTIMIZATION: Reuse pre-computed bottom trees instead of rebuilding!
+        // Reuse pre-computed bottom trees instead of rebuilding
         // Rust doesn't rebuild bottom trees during signing - it reuses the stored trees
         const leafs_per_bottom_tree = @as(usize, 1) << @intCast(self.lifetime_params.log_lifetime / 2);
         const bottom_tree_index = @as(usize, @intCast(epoch)) / leafs_per_bottom_tree;
@@ -5296,14 +5279,12 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         // Debug: log number of bottom layers (reused from stored tree)
         log.print("ZIG_SIGN_DEBUG: Reusing {} bottom tree layers from stored tree (bottom_tree_index={})\n", .{ bottom_layers.len, bottom_tree_index });
 
-        // CRITICAL VERIFICATION: For epoch 0, verify that the bottom tree root from stored layers matches expected
         if (epoch == 0 and bottom_tree_index == 0) {
             const root_layer = bottom_layers[bottom_layers.len - 1];
             if (root_layer.nodes.len > 0) {
                 const stored_root = root_layer.nodes[0];
                 log.print("ZIG_SIGN_DEBUG: Bottom tree 0 root from stored layers[0]=0x{x:0>8}\n", .{stored_root[0].value});
             }
-            // CRITICAL: Extract leaf domain for epoch 1 from stored tree layers
             // The first layer should contain the leaf domains
             if (bottom_layers.len > 0) {
                 const leaf_layer = bottom_layers[0];
@@ -5328,7 +5309,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const bottom_copath = try self.computePathFromLayers(bottom_layers, epoch);
         defer self.allocator.free(bottom_copath);
 
-        // CRITICAL: For epoch 0, verify that the first path node matches the stored leaf domain for epoch 1
         if (epoch == 0 and bottom_copath.len > 0 and bottom_tree_index == 0) {
             const first_path_node = bottom_copath[0];
             if (bottom_layers.len > 0) {
@@ -5353,7 +5333,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         // left_bottom_tree_index already declared above
         const top_pos = @as(u32, @intCast(bottom_tree_index));
 
-        // CRITICAL: Verify that top_layers[0].start_index matches left_bottom_tree_index
         // This ensures the path computation uses the correct offset
         if (top_layers.len > 0 and top_layers[0].start_index != left_bottom_tree_index) {
             log.debugPrint("ZIG_SIGN_ERROR: Top tree layer start_index mismatch! top_layers[0].start_index={}, left_bottom_tree_index={}\n", .{ top_layers[0].start_index, left_bottom_tree_index });
@@ -5369,7 +5348,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const top_copath = try self.computePathFromLayers(top_layers, top_pos);
         defer self.allocator.free(top_copath);
 
-        // CRITICAL DEBUG: Log the top tree path nodes for epoch 0 to compare with verification
         if (epoch == 0) {
             log.debugPrint("ZIG_SIGN_DEBUG: Top tree path for epoch 0 (top_pos={}, left_bottom_tree_index={}):\n", .{ top_pos, left_bottom_tree_index });
             for (top_copath, 0..) |node, i| {
@@ -5389,7 +5367,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             }
         }
 
-        // CRITICAL DEBUG: For epoch 0, log bottom tree path nodes to compare with verification
         if (epoch == 0) {
             log.debugPrint("ZIG_SIGN_DEBUG: Bottom tree path for epoch 0: {} nodes\n", .{bottom_copath.len});
             for (bottom_copath, 0..) |node, i| {
@@ -5451,7 +5428,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             log.print("\n", .{});
 
             // Debug: print parameter used for encoding (for Zig→Zig debugging, only for successful attempt)
-            // CRITICAL: Print parameter BEFORE calling deriveTargetSumEncoding
             log.print("ZIG_SIGN_DEBUG: secret_key.parameter BEFORE encoding (canonical): ", .{});
             for (0..5) |i| {
                 log.print("0x{x:0>8} ", .{secret_key.parameter[i].toCanonical()});
@@ -5577,7 +5553,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                     // Also print what position in chain this represents
                     log.debugPrint("ZIG_SIGN_DEBUG: Chain {} stored at position {} in chain (walked {} steps from position 0)\n", .{ chain_index, steps, steps });
 
-                    // CRITICAL VERIFICATION: For chain 0, verify that continuing the walk from position steps to base_minus_one
                     // gives the same result as computeHashChainDomain would produce
                     log.print("ZIG_SIGN_VERIFY_START: chain_index={}, epoch={}, steps={}\n", .{ chain_index, epoch, steps });
                     if (epoch == 0) {
@@ -5742,7 +5717,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             }
         }
 
-        // CRITICAL VERIFICATION: For epoch 0, verify that the first path node matches the leaf domain for epoch 1
         if (epoch == 0) {
             log.debugPrint("ZIG_SIGN_VERIFY_START: Epoch 0 verification block entered\n", .{});
             // First, verify that PRF key and parameter match what was used during tree building
@@ -5751,7 +5725,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             log.debugPrint("\n", .{});
             log.debugPrint("ZIG_SIGN_VERIFY: secret_key.parameter[0]=0x{x:0>8} (canonical: 0x{x:0>8})\n", .{ secret_key.parameter[0].value, secret_key.parameter[0].toCanonical() });
 
-            // CRITICAL TEST: Compute PRF domain element for epoch 1, chain 0 and compare
             const test_prf_domain = self.prfDomainElement(secret_key.prf_key, 1, 0);
             log.debugPrint("ZIG_SIGN_VERIFY: PRF domain for epoch 1, chain 0: domain[0]=0x{x:0>8}, prf_key[0..8]=", .{test_prf_domain[0]});
             for (secret_key.prf_key[0..8]) |b| log.debugPrint("{x:0>2}", .{b});
@@ -5761,7 +5734,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             const test_chain_domain = try self.computeHashChainDomain(test_prf_domain, 1, 0, secret_key.parameter);
             log.debugPrint("ZIG_SIGN_VERIFY: Chain domain for epoch 1, chain 0: chain_domain[0]=0x{x:0>8}\n", .{test_chain_domain[0].value});
 
-            // CRITICAL: Compare with expected value from tree building
             // Note: The stored trees may have been built with old code, so this check may fail
             // even though the chain domains match during keygen and signing
             log.debugPrint("ZIG_SIGN_VERIFY: Chain domain for epoch 1, chain 0: chain_domain[0]=0x{x:0>8}\n", .{test_chain_domain[0].value});
@@ -5775,7 +5747,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             // These logs have been removed as they were only useful during manual investigations.
         }
 
-        // CRITICAL VERIFICATION: For epoch 0, compute leaf domain from scratch and compare with what verification would produce
         if (epoch == 0) {
             var scratch_chain_domains = try self.allocator.alloc([8]FieldElement, self.lifetime_params.dimension);
             defer self.allocator.free(scratch_chain_domains);
@@ -6302,7 +6273,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
         const bottom_tree_depth = self.lifetime_params.log_lifetime / 2;
         log.print("ZIG_VERIFY_DEBUG: Starting Merkle path walk from epoch {} with {} nodes (bottom_tree_index={} top_pos={} bottom_depth={})\n", .{ epoch, nodes.len, bottom_tree_index, top_pos, bottom_tree_depth });
 
-        // CRITICAL: For epoch 0, log the first path node (sibling, which should be leaf domain for epoch 1)
         if (epoch == 0 and nodes.len > 0) {
             const first_path_node = nodes[0];
             log.debugPrint("ZIG_VERIFY_DEBUG: First path node[0] (sibling of epoch 0, should be epoch 1 leaf): 0x{x:0>8}\n", .{first_path_node[0].value});
@@ -6335,7 +6305,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
 
             // Determine new position (position of the parent) - shift BEFORE computing tweak (matching Rust)
             // pos_in_level is the position of the parent in its level, which is position / 2
-            // CRITICAL: pos_in_level should be computed from original_position (before shifting), not after
             // During tree building, pos_in_level = parent_start + i, where parent_start = start_index >> 1
             // For the first level, start_index = 0, so parent_start = 0, and pos_in_level = i (0, 1, 2, ...)
             // During verification, original_position is the position of the current node, and pos_in_level should be original_position / 2
@@ -6363,13 +6332,10 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                 }
                 log.debugPrint("\n", .{});
                 log.debugPrint("ZIG_VERIFY_DEBUG: For first top tree hash, level={}, pos_in_level={} (should match tree building: level=16, pos=0)\n", .{ level, pos_in_level });
-                // CRITICAL: Also log the hash inputs for the first top tree hash
                 log.debugPrint("ZIG_VERIFY_DEBUG: First top tree hash inputs: left[0]=0x{x:0>8}, right[0]=0x{x:0>8}, level={}, pos_in_level={}, param[0]=0x{x:0>8}\n", .{ left_slice[0].value, right_slice[0].value, level, pos_in_level, public_key.parameter[0].value });
-                // CRITICAL: Compare with expected root from tree building
                 log.debugPrint("ZIG_VERIFY_ERROR: Bottom tree 0 root mismatch! Computed=0x{x:0>8}, Expected=0x50175e49\n", .{current_domain[0].value});
             }
 
-            // CRITICAL DEBUG: Log top tree path nodes for epoch 0 to compare with signing
             if (epoch == 0 and is_top_tree) {
                 log.debugPrint("ZIG_VERIFY_DEBUG: Top tree node {} (epoch 0): position={}, pos_in_level={}, sibling[0]=0x{x:0>8}\n", .{ node_idx, position, pos_in_level, sibling_domain[0].value });
             }
@@ -6405,31 +6371,11 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                 for (right_slice) |fe| log.print("0x{x:0>8} ", .{fe.value});
                 log.print("\n", .{});
             }
-            // CRITICAL DEBUG: For first top tree node (epoch 0), log the hash inputs to compare with tree building
             if (epoch == 0 and is_top_tree and node_idx == bottom_tree_depth) {
                 log.debugPrint("ZIG_VERIFY_DEBUG: First top tree hash call (epoch 0): level={}, pos_in_level={}, left[0]=0x{x:0>8}, right[0]=0x{x:0>8}\n", .{ level, pos_in_level, left_slice[0].value, right_slice[0].value });
                 log.debugPrint("ZIG_VERIFY_DEBUG:   current_domain[0]=0x{x:0>8}, sibling[0]=0x{x:0>8}, is_left={}\n", .{ current_domain[0].value, sibling_domain[0].value, is_left });
             }
 
-            // CRITICAL FIX: Match Rust's hash_tree_verify implementation
-            // In Rust:
-            //   1. Initial hash: tree_tweak(0, position) → hashes chain_ends (64 domains) to get current_node
-            //   2. Loop iteration 0 (l=0): tree_tweak(1, current_position) → hashes [current_node, co_path[0]]
-            //   3. Loop iteration 1 (l=1): tree_tweak(2, current_position) → hashes [current_node, co_path[1]]
-            // In our code:
-            //   1. reduceChainDomainsToLeafDomain (sponge hash) → produces leaf_domain (single domain)
-            //   2. Node 0: should match Rust's iteration 0, which uses level=1
-            //   3. Node 1: should match Rust's iteration 1, which uses level=2
-            // Our hash function adds 1 internally, so:
-            //   Node 0: use level=0 → becomes tweak_level=1 (matches Rust's level=1) ✓
-            //   Node 1: use level=1 → becomes tweak_level=2 (matches Rust's level=2) ✓
-            // But wait - Rust's initial hash uses level=0, and our reduceChainDomainsToLeafDomain also uses level=0.
-            // So our leaf_domain should already be the result of the initial hash.
-            // Then node 0 should use level=1 (which becomes tweak_level=2) to match Rust's iteration 0 (level=1).
-            // But that would give tweak_level=2, not tweak_level=1.
-            // Actually, let me check: Rust's iteration 0 uses level=1, and our hash function adds 1, so we should use level=0 to get tweak_level=1.
-            // But Rust uses level=1 directly, so tweak_level=1. So we should use level=0 to get tweak_level=1. ✓
-            // So the fix is correct: use level = node_idx
             const hash_level = level;
             const parent = try self.applyPoseidonTreeTweakHashWithSeparateInputs(left_slice, right_slice, hash_level, pos_in_level, public_key.parameter);
             defer self.allocator.free(parent);
@@ -6439,14 +6385,12 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                 log.debugPrint("ZIG_VERIFY_DEBUG: First bottom tree hash (epoch 0): left[0]=0x{x:0>8} (Montgomery) / 0x{x:0>8} (Canonical), right[0]=0x{x:0>8} (Montgomery) / 0x{x:0>8} (Canonical), level={}, hash_level={}, tweak_level={}, pos_in_level={}, param[0]=0x{x:0>8} (Montgomery) / 0x{x:0>8} (Canonical), parent[0]=0x{x:0>8} (Montgomery) / 0x{x:0>8} (Canonical)\n", .{ left_slice[0].value, left_slice[0].toCanonical(), right_slice[0].value, right_slice[0].toCanonical(), level, hash_level, hash_level + 1, pos_in_level, public_key.parameter[0].value, public_key.parameter[0].toCanonical(), parent[0].value, parent[0].toCanonical() });
             }
 
-            // CRITICAL DEBUG: For first top tree hash (epoch 0), log the result and tweak
             if (epoch == 0 and is_top_tree and node_idx == bottom_tree_depth) {
                 const tweak_level = level + 1;
                 const tweak_bigint = (@as(u128, tweak_level) << 40) | (@as(u128, pos_in_level) << 8) | 0x01;
                 log.debugPrint("ZIG_VERIFY_DEBUG: First top tree hash result[0]: 0x{x:0>8}, tweak_level={}, pos_in_level={}, tweak=0x{x}\n", .{ parent[0].value, tweak_level, pos_in_level, tweak_bigint });
             }
 
-            // CRITICAL DEBUG: For epoch 0, log bottom tree path walk to find where it goes wrong
             if (epoch == 0 and is_bottom_tree) {
                 log.debugPrint("ZIG_VERIFY_DEBUG: Bottom tree node {} (epoch 0): level={}, pos_in_level={}, current[0]=0x{x:0>8}, sibling[0]=0x{x:0>8}, parent[0]=0x{x:0>8}, is_left={}\n", .{ node_idx, level, pos_in_level, current_domain[0].value, sibling_domain[0].value, parent[0].value, is_left });
                 // For the first node, also log the hash inputs in detail (after hash_level is computed)
@@ -6463,7 +6407,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
                 }
             }
 
-            // CRITICAL DEBUG: For first top tree node (epoch 0), log the hash result
             if (epoch == 0 and is_top_tree and node_idx == bottom_tree_depth) {
                 log.debugPrint("ZIG_VERIFY_DEBUG: First top tree hash result (epoch 0): parent[0]=0x{x:0>8}\n", .{parent[0].value});
             }
@@ -6494,7 +6437,6 @@ pub const GeneralizedXMSSSignatureScheme = struct {
             if (epoch == 16) {
                 log.print("ZIG_VERIFY: Epoch {} - After level {}: current_domain[0]=0x{x:0>8} (original_position={}, is_left={}, pos_in_level={})\n", .{ epoch, level, current_domain[0].value, original_position, is_left, pos_in_level });
             }
-            // CRITICAL: For epoch 0, after bottom tree walk (node 15), compare with expected root
             if (epoch == 0 and node_idx == 15) {
                 log.debugPrint("ZIG_VERIFY_DEBUG: After bottom tree walk (node 15): current_domain[0]=0x{x:0>8}, expected root=0x50175e49\n", .{current_domain[0].value});
                 if (current_domain[0].value != 0x50175e49) {
diff --git a/src/signature/serialization.zig b/src/signature/serialization.zig
index c42ffb5..068f424 100644
--- a/src/signature/serialization.zig
+++ b/src/signature/serialization.zig
@@ -361,7 +361,7 @@ pub fn deserializePublicKey(json_str: []const u8) !GeneralizedXMSSPublicKey {
     }
 
     // Parse parameter
-    // CRITICAL: serializeFieldElement uses toCanonical(), so the JSON file contains canonical values.
+    // serializeFieldElement uses toCanonical(), so the JSON file contains canonical values
     // We must read them as canonical and convert to Montgomery internally via parseFieldElementFromJsonValue.
     const param_array = obj.get("parameter") orelse return error.MissingParameterField;
     if (param_array != .array or param_array.array.items.len != 5) return error.InvalidJsonFormat;
@@ -464,7 +464,7 @@ pub fn deserializeSecretKeyData(allocator: Allocator, json_str: []const u8) !Des
     };
 
     // Parse parameter
-    // CRITICAL: serializeFieldElement uses toCanonical(), so the JSON file contains canonical values.
+    // serializeFieldElement uses toCanonical(), so the JSON file contains canonical values
     // We must read them as canonical and convert to Montgomery internally via parseFieldElementFromJsonValue.
     const param_array = obj.get("parameter") orelse return error.MissingParameterField;
     if (param_array != .array or param_array.array.items.len != 5) return error.InvalidJsonFormat;
diff --git a/src/wots/winternitz.zig b/src/wots/winternitz.zig
index b19ee50..6e7c741 100644
--- a/src/wots/winternitz.zig
+++ b/src/wots/winternitz.zig
@@ -174,7 +174,7 @@ pub const WinternitzOTS = struct {
         // Allocate and ZERO the combined buffer to ensure deterministic results
         // Without zeroing, uninitialized memory causes non-deterministic hashes with Arena allocators
         var combined = try allocator.alloc(u8, public_parts.len * hash_output_len);
-        @memset(combined, 0); // CRITICAL: Zero the buffer first
+        @memset(combined, 0); // Zero the buffer first
         defer allocator.free(combined);
 
         for (public_parts, 0..) |part, i| {
@@ -264,7 +264,7 @@ pub const WinternitzOTS = struct {
 
         // Allocate and ZERO the combined buffer to ensure deterministic results
         var combined = try allocator.alloc(u8, public_parts.len * hash_output_len);
-        @memset(combined, 0); // CRITICAL: Zero the buffer first
+        @memset(combined, 0); // Zero the buffer first
         defer allocator.free(combined);
 
         for (public_parts, 0..) |part, i| {