Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue#622: change usize to u64 #641

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion filecoin-proofs/examples/beacon-post.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ fn do_the_work(
let sp = SetupParams::<PedersenDomain, vdf_sloth::Sloth> {
vdf_post_setup_params: vdf_post::SetupParams::<PedersenDomain, vdf_sloth::Sloth> {
challenge_count,
sector_size: size,
sector_size: size as u64,
post_epochs,
setup_params_vdf: vdf_sloth::SetupParams {
key: rng.gen(),
Expand Down
29 changes: 12 additions & 17 deletions filecoin-proofs/src/api/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ fn get_zigzag_params(porep_config: PoRepConfig) -> error::Result<Arc<groth16::Pa
Ok(lookup_groth_params(
format!(
"ZIGZAG[{}]",
usize::from(PaddedBytesAmount::from(porep_config))
u64::from(PaddedBytesAmount::from(porep_config))
),
get_params,
)?)
Expand All @@ -149,10 +149,7 @@ fn get_post_params(post_config: PoStConfig) -> error::Result<Arc<groth16::Parame
};

Ok(lookup_groth_params(
format!(
"POST[{}]",
usize::from(PaddedBytesAmount::from(post_config))
),
format!("POST[{}]", u64::from(PaddedBytesAmount::from(post_config))),
get_params,
)?)
}
Expand All @@ -169,7 +166,7 @@ fn get_zigzag_verifying_key(porep_config: PoRepConfig) -> error::Result<Arc<Bls1
Ok(lookup_verifying_key(
format!(
"ZIGZAG[{}]",
usize::from(PaddedBytesAmount::from(porep_config))
u64::from(PaddedBytesAmount::from(porep_config))
),
get_verifying_key,
)?)
Expand All @@ -188,10 +185,7 @@ fn get_post_verifying_key(post_config: PoStConfig) -> error::Result<Arc<Bls12Ver
};

Ok(lookup_verifying_key(
format!(
"POST[{}]",
usize::from(PaddedBytesAmount::from(post_config))
),
format!("POST[{}]", u64::from(PaddedBytesAmount::from(post_config))),
get_verifying_key,
)?)
}
Expand Down Expand Up @@ -236,7 +230,7 @@ fn setup_params(
sector_bytes: PaddedBytesAmount,
partitions: usize,
) -> layered_drgporep::SetupParams {
let sector_bytes = usize::from(sector_bytes);
let sector_bytes = u64::from(sector_bytes);

let challenges = select_challenges(
partitions,
Expand All @@ -251,7 +245,7 @@ fn setup_params(
"sector_bytes ({}) must be a multiple of 32",
sector_bytes,
);
let nodes = sector_bytes / 32;
let nodes = (sector_bytes / 32) as usize;
Copy link
Collaborator

@porcuquine porcuquine May 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With large enough sectors (> 128GiB), nodes will exceed u32 too.

Copy link
Contributor Author

@steven004 steven004 May 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not only nodes, and perhaps sector counts as well in some other files (when a big repo > 1EB, the sector number could > 4G), I would think it is better to do incremental changes in multiple commits, and make sure every step is correct.

How do you think about this? @porcuquine

layered_drgporep::SetupParams {
drg: DrgParams {
nodes,
Expand Down Expand Up @@ -525,7 +519,7 @@ pub fn seal<T: Into<PathBuf> + AsRef<Path>>(
prover_id_in: &FrSafe,
sector_id_in: &FrSafe,
) -> error::Result<SealOutput> {
let sector_bytes = usize::from(PaddedBytesAmount::from(porep_config));
let sector_bytes = u64::from(PaddedBytesAmount::from(porep_config));

let mut cleanup = FileCleanup::new(&out_path);

Expand All @@ -534,7 +528,7 @@ pub fn seal<T: Into<PathBuf> + AsRef<Path>>(
let f_data = OpenOptions::new().read(true).write(true).open(&out_path)?;

// Zero-pad the data to the requested size by extending the underlying file if needed.
f_data.set_len(sector_bytes as u64)?;
f_data.set_len(sector_bytes)?;

let mut data = unsafe { MmapOptions::new().map_mut(&f_data).unwrap() };

Expand Down Expand Up @@ -656,7 +650,8 @@ pub fn get_unsealed_range<T: Into<PathBuf> + AsRef<Path>>(
&unsealed,
&mut buf_writer,
offset as usize,
num_bytes.into(),
// num_bytes.into(),
u64::from(num_bytes) as usize,
)?;

Ok(UnpaddedBytesAmount(written as u64))
Expand Down Expand Up @@ -808,10 +803,10 @@ mod tests {

assert_eq!(
contents.len(),
usize::from(
u64::from(
mgr.write_and_preprocess(&staged_access, &mut file)
.expect("failed to write and preprocess")
)
) as usize
);

written_contents.push(contents);
Expand Down
29 changes: 16 additions & 13 deletions sector-base/src/api/bytes_amount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ impl From<UnpaddedBytesAmount> for u64 {
}
}

impl From<UnpaddedBytesAmount> for usize {
fn from(n: UnpaddedBytesAmount) -> Self {
n.0 as usize
}
}

// //Delete the implementation for usize, since it could bring potential issue on a 32-bit system
// // to fix #622
// impl From<UnpaddedBytesAmount> for usize {
// fn from(n: UnpaddedBytesAmount) -> Self {
// n.0 as usize
// }
// }

// This could potentially trigger some issues, when convert u64 to usize and reverse back.
// Todo: need to find where this function is called and what's the impact
impl From<UnpaddedBytesAmount> for PaddedBytesAmount {
fn from(n: UnpaddedBytesAmount) -> Self {
PaddedBytesAmount(padded_bytes(n.0 as usize) as u64)
Expand All @@ -37,11 +41,12 @@ impl From<PaddedBytesAmount> for u64 {
}
}

impl From<PaddedBytesAmount> for usize {
fn from(n: PaddedBytesAmount) -> Self {
n.0 as usize
}
}
// //Delete the implementation for usize, to fix issue #622
// impl From<PaddedBytesAmount> for usize {
// fn from(n: PaddedBytesAmount) -> Self {
// n.0 as usize
// }
// }

impl From<PaddedBytesAmount> for UnpaddedBytesAmount {
fn from(n: PaddedBytesAmount) -> Self {
Expand Down Expand Up @@ -120,9 +125,7 @@ mod tests {

// Coercion to primitives work
assert_eq!(1u64 + u64::from(b), 3u64);
assert_eq!(1usize + usize::from(b), 3usize);
assert_eq!(1u64 + u64::from(e), 3u64);
assert_eq!(1usize + usize::from(e), 3usize);

// But not between BytesAmount types
// assert_eq!(a + UnpaddedBytesAmount::from(e), c);
Expand Down
4 changes: 2 additions & 2 deletions sector-base/src/api/disk_backed_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ impl SectorManager for DiskManager {
file.seek(SeekFrom::Start(start_offset))
.map_err(|err| SectorManagerErr::CallerError(format!("{:?}", err)))?;

let mut buf = vec![0; usize::from(num_bytes)];
let mut buf = vec![0; (u64::from(num_bytes)) as usize];
Copy link
Collaborator

@porcuquine porcuquine May 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be missing something, but I think that if usize is u32 (on a 32-bit system), then this will truncate so isn't any better than converting UnpaddedBytesAmount to usize would have been.

In other words, I think this is fundamentally a matter of us wanting to address more than 32-bits worth of memory; and trying to get around it only indirects the issue.

It seems to me that the only simple way to avoid this is to avoid the situation. That's probably done most gracefully by putting a bounds check into the (removed above) conversion to usize. I imagine there's some clever way we could get around all this, but I suspect it would involve jumping through a lot of hoops — and at the end of the day, we'd still never want to try replicate too-large sectors on a 32-bit machine.

I'm starting to think the main solution to this is to never create sectors larger than 4GiB (max) if we are on a 32-bit system. We could, and perhaps should, check that when the SectorBuilder is requested in the first place. @dignifiedquire @laser

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is one of places I remain there for future decision. Originally, I put an TODO comment there in the code to reflect that as you mentioned, but, not sure why it could not pass the fmt check, then deleted it in the commit b7818d0

Copy link
Collaborator

@porcuquine porcuquine May 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's how I see it:

Code like the above, where we are creating/indexing vectors/slices must use usize — as in your cast above. This is enforced by the compiler (for good reason).

For now, there is no plan to try to support replication with less RAM than required to hold a sector — so even if we can incrementally remove some instances of allocation/reference that require 64 bits, we still won't have changed the fundamental fact that the system can't handle the situation.

I think we should defer rippling partial changes which create the illusion that we can address sectors larger than the architecture will allow. Instead, we should use usize internally in any place that may eventually propagate to an index variable or vector size.

If we really want to expose u64 as the official type, we can either:

  • Always fail on 32-bit machines (seems like a bad choice).
  • Perform a bounds check and fail on 32-bit machines when attempting to create a SectorBuilder handling sectors of > 4GiB.

Given the second option, we could also just use usize as the externally visible type (and use the appropriate FFI equivalent) — so that callers cannot pass values we won't be able to handle. That pushes the failure completely outside the FPS and 'makes illegal states unrepresentable'.

Copy link
Contributor Author

@steven004 steven004 May 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we really want to expose u64 as the official type, we can either:

  • Always fail on 32-bit machines (seems like a bad choice).
  • Perform a bounds check and fail on 32-bit machines when attempting to create a SectorBuilder handling sectors of > 4GiB.

Agreed, we could add the boundary check in places where u64 to usize functions for 32-bit system. Do you prefer we put it into this commit, or we could do it in the next commit?

I am thinking the best way is to ask compiler to do this, i.e. we have the code in place, and insert some compiler indicators to let the compiler decide if the code should be in or not in the binary depending on the system type (32-bit or 64-bit). I am not that familiar to add this kind of compiler indicator in rust, anyone has any suggestions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the second option, we could also just use usize as the externally visible type (and use the appropriate FFI equivalent) — so that callers cannot pass values we won't be able to handle. That pushes the failure completely outside the FPS and 'makes illegal states unrepresentable'.

Interesting. Should a mining rig operator who is running a 32-bit system be able to verify a PoSt or PoRep proof generated for sectors with size > 4GiB (by some other miner)? Both verify_post and verify_seal FFI-exposed functions have a sector_size: u64 parameter.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. There's no reason why 32-bit systems should be unable to verify large sectors. As long as sectors are not greater than 128GiB, we should still be able to represent nodes in 32 bits. So we don't need to propagate a change into graph, etc. We should (I think) coerce to usize as early as possible and put in a check at that point, failing (for the proving path) if this will overflow usize.

If and when we need to verify sectors over 128GiB, we will need to revisit this. I suggest we code this in such a way that we can't silently fail to do so in that event.


file.read_exact(buf.as_mut_slice())
.map_err(|err| SectorManagerErr::CallerError(format!("{:?}", err)))?;
Expand Down Expand Up @@ -315,7 +315,7 @@ pub mod tests {
let output_bytes_written = buf.len();

// ensure that we reported the correct number of written bytes
assert_eq!(contents.len(), usize::from(n));
assert_eq!(contents.len(), u64::from(n) as usize);

// ensure the file we wrote to contains the expected bytes
assert_eq!(contents[0..32], buf[0..32]);
Expand Down
1 change: 1 addition & 0 deletions sector-base/src/io/fr32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,7 @@ where
// offset and num_bytes are based on the unpadded data, so
// if [0, 1, ..., 255] was the original unpadded data, offset 3 and len 4 would return
// [3, 4, 5, 6].
// TODO: change the type of offset and len to u64, or limit this program only run on 64-bit system
pub fn write_unpadded<W: ?Sized>(
source: &[u8],
target: &mut W,
Expand Down
6 changes: 3 additions & 3 deletions storage-proofs/src/vdf_post.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub struct SetupParams<T: Domain, V: Vdf<T>> {
/// The number of challenges to be asked at each iteration.
pub challenge_count: usize,
/// Size of a sealed sector in bytes.
pub sector_size: usize,
pub sector_size: u64,
/// Number of times we repeat an online Proof-of-Replication in one single PoSt.
pub post_epochs: usize,
pub setup_params_vdf: V::SetupParams,
Expand All @@ -37,7 +37,7 @@ pub struct PublicParams<T: Domain, V: Vdf<T>> {
/// The number of challenges to be asked at each iteration.
pub challenge_count: usize,
/// Size of a sealed sector in bytes.
pub sector_size: usize,
pub sector_size: u64,
/// Number of times we repeat an online Proof-of-Replication in one single PoSt.
pub post_epochs: usize,
pub pub_params_vdf: V::PublicParams,
Expand Down Expand Up @@ -143,7 +143,7 @@ impl<'a, H: Hasher + 'a, V: Vdf<H::Domain>> ProofScheme<'a> for VDFPoSt<H, V> {
);
// Assuming well-formed (power of two) sector size, log2(sector_size) is given by number of trailing zeroes.
let log2 = sector_size.trailing_zeros();
let leaves = sector_size / 32;
let leaves = (sector_size / 32) as usize;
let challenge_bits = (log2 - 5) as usize;
assert_eq!(
2u64.pow(challenge_bits as u32),
Expand Down