Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 123 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ DEBUG_ASSERTIONS=RUSTFLAGS="-C debug-assertions"
FEATURES_CONCURRENT_EXEC=--features concurrent,executable
FEATURES_LOG_TREE=--features concurrent,executable,tracing-forest
FEATURES_METAL_EXEC=--features concurrent,executable,metal
ALL_FEATURES_BUT_ASYNC=--features concurrent,executable,metal,testing,with-debug-info,internal
FEATURES_CUDA_EXEC=--features concurrent,executable,cuda
ALL_FEATURES_BUT_ASYNC=--features concurrent,executable,metal,cuda,testing,with-debug-info,internal

# -- linting --------------------------------------------------------------------------------------

Expand Down Expand Up @@ -100,6 +101,10 @@ exec-single: ## Builds a single-threaded executable
exec-metal: ## Builds an executable with Metal acceleration enabled
cargo build --profile optimized $(FEATURES_METAL_EXEC)

.PHONY: exec-cuda
exec-cuda: ## Builds an executable with CUDA acceleration enabled
RUSTFLAGS="-C target-feature=+avx2" cargo build --profile optimized $(FEATURES_CUDA_EXEC)

.PHONY: exec-avx2
exec-avx2: ## Builds an executable with AVX2 acceleration enabled
RUSTFLAGS="-C target-feature=+avx2" cargo build --profile optimized $(FEATURES_CONCURRENT_EXEC)
Expand Down
17 changes: 17 additions & 0 deletions air/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,23 @@ impl ProvingOptions {
self
}

/// Sets partitions for this [ProvingOptions].
///
/// Partitions can be provided to split traces during proving and distribute work across
/// multiple devices. The number of partitions should be equal to the number of devices.
pub const fn with_partitions(mut self, num_partitions: usize) -> Self {
// All currently supported hash functions consume 8 felts per iteration.
// Match statement ensures that future changes to available hashes are reflected here.
let hash_rate = match self.hash_fn {
HashFunction::Blake3_192 => 8,
HashFunction::Blake3_256 => 8,
HashFunction::Rpo256 => 8,
HashFunction::Rpx256 => 8,
};
self.proof_options = self.proof_options.with_partitions(num_partitions, hash_rate);
self
}

// PUBLIC ACCESSORS
// --------------------------------------------------------------------------------------------

Expand Down
12 changes: 10 additions & 2 deletions docs/src/intro/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,20 @@ Internally, Miden VM uses [rayon](https://github.com/rayon-rs/rayon) for paralle

### GPU acceleration

Miden VM proof generation can be accelerated via GPUs. Currently, GPU acceleration is enabled only on Apple Silicon hardware (via [Metal](<https://en.wikipedia.org/wiki/Metal_(API)>)). To compile Miden VM with Metal acceleration enabled, you can run the following command:
Miden VM proof generation can be accelerated via GPUs. Currently, GPU acceleration is enabled on Apple Silicon hardware (via [Metal](<https://en.wikipedia.org/wiki/Metal_(API)>)) and Nvidia GPUs (via [Cuda](https://en.wikipedia.org/wiki/CUDA)).

To compile Miden VM with Metal acceleration enabled, you can run the following command:

```shell
make exec-metal
```

To compile Miden VM with CUDA acceleration enabled, you can run the following command:

```shell
make exec-cuda
```

Similar to `make exec` command, this will place the resulting `miden` executable into the `./target/optimized` directory.

Currently, GPU acceleration is applicable only to recursive proofs which can be generated using the `-r` flag.
Expand All @@ -66,7 +74,7 @@ make exec-sve

This will place the resulting `miden` executable into the `./target/optimized` directory.

Similar to Metal acceleration, SVE/AVX2 acceleration is currently applicable only to recursive proofs which can be generated using the `-r` flag.
Similar to GPU acceleration, SVE/AVX2 acceleration is currently applicable only to recursive proofs which can be generated using the `-r` flag.

### Running Miden VM

Expand Down
1 change: 1 addition & 0 deletions miden/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ executable = [
"dep:rustyline",
"dep:tracing-subscriber",
]
cuda = ["prover/cuda", "std"]
metal = ["prover/metal", "std"]
std = ["assembly/std", "processor/std", "prover/std", "verifier/std"]
# For internal use, not meant to be used by users
Expand Down
3 changes: 3 additions & 0 deletions miden/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ make exec
# build an executable for Apple silicon (concurrent+metal)
make exec-metal

# build an executable for Nvidia GPU (concurrent+cuda)
make exec-cuda

# build an executable for targets with AVX2 instructions (concurrent)
make exec-avx2

Expand Down
20 changes: 15 additions & 5 deletions miden/src/cli/prove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ use assembly::diagnostics::{IntoDiagnostic, Report, WrapErr};
use clap::Parser;
use miden_vm::{internal::InputFile, ProvingOptions};
use processor::{DefaultHost, ExecutionOptions, ExecutionOptionsError, Program};
#[cfg(all(target_arch = "x86_64", feature = "cuda"))]
use prover::cuda::get_num_of_gpus;
use prover::Prover;
use stdlib::StdLibrary;
use tracing::instrument;

Expand Down Expand Up @@ -65,6 +68,11 @@ impl ProveCmd {
pub fn get_proof_options(&self) -> Result<ProvingOptions, ExecutionOptionsError> {
let exec_options =
ExecutionOptions::new(Some(self.max_cycles), self.expected_cycles, self.trace, false)?;

let partitions = 1;
#[cfg(all(target_arch = "x86_64", feature = "cuda"))]
let partitions = get_num_of_gpus();

Ok(match self.security.as_str() {
"96bits" => {
if self.rpx {
Expand All @@ -82,7 +90,8 @@ impl ProveCmd {
},
other => panic!("{} is not a valid security setting", other),
}
.with_execution_options(exec_options))
.with_execution_options(exec_options)
.with_partitions(partitions))
}

pub fn execute(&self) -> Result<(), Report> {
Expand All @@ -105,10 +114,11 @@ impl ProveCmd {
self.get_proof_options().map_err(|err| Report::msg(format!("{err}")))?;

// execute program and generate proof
let (stack_outputs, proof) =
prover::prove(&program, stack_inputs, &mut host, proving_options)
.into_diagnostic()
.wrap_err("Failed to prove program")?;
let mut prover = Prover::new();
let (stack_outputs, proof) = prover
.prove(&program, stack_inputs, &mut host, proving_options)
.into_diagnostic()
.wrap_err("Failed to prove program")?;

println!(
"Program with hash {} proved in {} ms",
Expand Down
2 changes: 1 addition & 1 deletion miden/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub use processor::{
ProgramInfo, StackInputs, VmState, VmStateIterator, ZERO,
};
pub use prover::{
math, prove, Digest, ExecutionProof, FieldExtension, HashFunction, InputError, Proof,
math, Digest, ExecutionProof, FieldExtension, HashFunction, InputError, Proof, Prover,
ProvingOptions, StackOutputs, Word,
};
pub use verifier::{verify, VerificationError};
Expand Down
2 changes: 2 additions & 0 deletions processor/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ pub enum ExecutionError {
NoMastForestWithProcedure { root_digest: Digest },
#[error("memory address cannot exceed 2^32 but was {0}")]
MemoryAddressOutOfBounds(u64),
#[error("VM exceeded the memory usage limit {0}")]
MemoryLimitExceeded(usize),
#[error(
"word memory access at address {addr} in context {ctx} is unaligned at clock cycle {clk}"
)]
Expand Down
Loading