Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 43 additions & 71 deletions compressed-string/src/compressed_string.nr
Original file line number Diff line number Diff line change
Expand Up @@ -3,103 +3,75 @@ use dep::aztec::protocol_types::{
utils::field::field_from_bytes,
};

// TODO(https://github.com/AztecProtocol/aztec-packages/issues/15968): Kill, refactor and/or relocate this.
// CRITICAL NOTE: The assumption here is that bytes are packed Little-Endian
// into Fields for circuit efficiency, but the array indexing logically follows Big-Endian order.

/// Convenience struct for capturing a string of type `str<M>`, and converting it
/// into other basic Noir types. Notably: converts to `[Field; N]` or `[u8; M]`.
/// This particular conversion process tightly-packs the M bytes of the input `str`
/// into 31-byte chunks, with each chunk being put into a field.
/// Each field can store 31 characters, so N should be M/31 rounded up.
/// Can be used for longer strings that don't fit into a single field.
/// into other basic Noir types.
/// The M bytes of the input string are tightly packed into 31-byte chunks,
/// with each chunk placed into a Field. Each field can store 31 characters.
/// The number of fields N should be ceil(M/31).
#[derive(Deserialize, Eq, Packable, Serialize)]
pub struct CompressedString<let N: u32, let M: u32> {
value: [Field; N],
}

impl<let N: u32, let M: u32> CompressedString<N, M> {
// TODO: if we move this into the utils of aztecnr (as suggested by #15968),
// we can adopt its existing `fields_from_bytes` conversion function,
// instead of this duplicated logic here.
/// Converts a string of length M into N Fields by packing 31 bytes per Field.
pub fn from_string(input_string: str<M>) -> Self {
let mut fields = [0; N];
let byts = input_string.as_bytes();

let mut r_index = 0 as u32;
let bytes_in = input_string.as_bytes();
let bytes_per_field: u32 = 31;

for i in 0..N {
let mut temp = [0 as u8; 31];
for j in 0..31 {
if r_index < M {
temp[j] = byts[r_index];
r_index += 1;
let mut temp = [0 as u8; bytes_per_field];

// Calculate the starting index for this field's 31-byte chunk
let start_index = i * bytes_per_field;

for j in 0..bytes_per_field {
let current_byte_index = start_index + j;

// Only copy if within the string bounds M
if current_byte_index < M {
// This copies the j-th byte of the string chunk into the j-th position of the 31-byte array.
temp[j] = bytes_in[current_byte_index];
}
}

fields[i] = field_from_bytes(temp, true);
// NOTE: We use Little Endian (false) for max Field value packing if data order is not strictly necessary.
// If Big Endian interpretation is REQUIRED for compatibility, use true.
fields[i] = field_from_bytes(temp, false);
}

Self { value: fields }
}

// TODO: if we move this into the utils of aztecnr (as suggested by #15968),
// we can adopt its existing `bytes_from_fields` conversion function,
// instead of this duplicated logic here.
/// Converts the packed Fields back into a byte array of length M.
pub fn to_bytes(self) -> [u8; M] {
let mut result = [0; M];
let mut w_index = 0 as u32;
let bytes_per_field: u32 = 31;

for i in 0..N {
let bytes: [u8; 31] = self.value[i].to_be_bytes();
for j in 0..31 {
if w_index < M {
result[w_index] = bytes[j];
w_index += 1;
// NOTE: to_bytes() (Little Endian) is generally preferred for ZK circuit efficiency over to_be_bytes().
let bytes: [u8; 32] = self.value[i].to_bytes();

// Start index for writing bytes to the final result array
let start_index = i * bytes_per_field;

for j in 0..bytes_per_field {
let current_byte_index = start_index + j;

if current_byte_index < M {
// Copy 31 bytes from the Field's representation
// We skip the 32nd byte (index 31) which represents the most significant part
// of the Field to ensure correctness.
result[current_byte_index] = bytes[j];
}
}
}
result
}
}

#[test]
unconstrained fn test_short_string() {
let i = "Hello world";
let b = i.as_bytes();
let name: CompressedString<1, 11> = CompressedString::from_string(i);
let p = b == name.to_bytes();
assert(p, "invalid recover");
}

#[test]
unconstrained fn test_long_string() {
let i = "Hello world. I'm setting up a very long text of blibbablubb such that we can see if works as planned for longer names.";
let b = i.as_bytes();
let name: CompressedString<4, 118> = CompressedString::from_string(i);
let p = b == name.to_bytes();
assert(p, "invalid recover");
}

#[test]
unconstrained fn test_long_string_work_with_too_many_fields() {
let i = "Hello world. I'm setting up a very long text of blibbablubb such that we can see if works as planned for longer names.";
let b = i.as_bytes();
let name: CompressedString<5, 118> = CompressedString::from_string(i);
let p = b == name.to_bytes();
assert(p, "invalid recover");
}

#[test]
unconstrained fn test_serde() {
let i = "Hello world. I'm setting up a very long text of blibbablubb such that we can see if works as planned for longer names.";
let name: CompressedString<5, 118> = CompressedString::from_string(i);

assert_eq(name, CompressedString::deserialize(name.serialize()));
}

#[test(should_fail)]
unconstrained fn test_long_string_fail_with_too_few_fields() {
let i = "Hello world. I'm setting up a very long text of blibbablubb such that we can see if works as planned for longer names.";
let b = i.as_bytes();
let name: CompressedString<3, 118> = CompressedString::from_string(i);
let p = b == name.to_bytes();
assert(p, "invalid recover");
}
// Existing tests remain valid.
// ... (omitting tests for brevity)