-
Notifications
You must be signed in to change notification settings - Fork 108
feat: add blob encoding benchmark and profiling harness #3085
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAICT, many of the sub-benchmarks here mostly copy some code from |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,248 @@ | ||
| // Copyright (c) Walrus Foundation | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| // Allowing `unwrap`s in benchmarks. | ||
| #![allow(clippy::unwrap_used)] | ||
|
|
||
| //! Phase-level benchmarks for the blob encoding pipeline at production parameters (n_shards=1000). | ||
| //! | ||
| //! Complements `blob_encoding.rs` (end-to-end) and `basic_encoding.rs` (individual RS/merkle ops) | ||
| //! by measuring each phase of `encode_with_metadata()` independently. | ||
|
|
||
| use core::{num::NonZeroU16, time::Duration}; | ||
|
|
||
| use criterion::{AxisScale, BenchmarkId, Criterion, PlotConfiguration}; | ||
| use fastcrypto::hash::Blake2b256; | ||
| use walrus_core::{ | ||
| encoding::{EncodingFactory as _, ReedSolomonEncoder, ReedSolomonEncodingConfig}, | ||
| merkle::{MerkleTree, Node, leaf_hash}, | ||
| }; | ||
| use walrus_test_utils::random_data; | ||
|
|
||
| const N_SHARDS: u16 = 1000; | ||
|
|
||
| const BLOB_SIZES: &[(u64, &str)] = &[(1 << 20, "1MiB"), (1 << 25, "32MiB"), (1 << 28, "256MiB")]; | ||
|
|
||
| fn encoding_config() -> ReedSolomonEncodingConfig { | ||
| ReedSolomonEncodingConfig::new(NonZeroU16::new(N_SHARDS).unwrap()) | ||
| } | ||
|
|
||
| /// Benchmark secondary encoding: RS-encode each row to produce repair secondary slivers. | ||
| fn secondary_encoding(c: &mut Criterion) { | ||
| let config = encoding_config(); | ||
| let mut group = c.benchmark_group("secondary_encoding"); | ||
| group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); | ||
|
|
||
| for (blob_size, size_str) in BLOB_SIZES { | ||
| let blob = random_data((*blob_size).try_into().unwrap()); | ||
| group.throughput(criterion::Throughput::Bytes(*blob_size)); | ||
|
|
||
| let encoder = config.get_blob_encoder(&blob).unwrap(); | ||
| let symbol_size = encoder.symbol_usize(); | ||
| let n_rows: usize = config.n_primary_source_symbols().get().into(); | ||
| let n_cols: usize = config.n_secondary_source_symbols().get().into(); | ||
| let row_len = n_cols * symbol_size; | ||
|
|
||
| // Build row data (same layout as primary slivers' data). | ||
| let mut rows: Vec<Vec<u8>> = Vec::with_capacity(n_rows); | ||
| for r in 0..n_rows { | ||
| let start = r * row_len; | ||
| let end = (start + row_len).min(blob.len()); | ||
| let mut row = vec![0u8; row_len]; | ||
| if start < blob.len() { | ||
| row[..end - start].copy_from_slice(&blob[start..end]); | ||
| } | ||
| rows.push(row); | ||
| } | ||
|
|
||
| group.bench_with_input( | ||
| BenchmarkId::new("encode_rows", size_str), | ||
| &rows, | ||
| |b, rows| { | ||
| b.iter(|| { | ||
| let mut enc = ReedSolomonEncoder::new( | ||
| NonZeroU16::new(symbol_size.try_into().unwrap()).unwrap(), | ||
| config.n_secondary_source_symbols(), | ||
| NonZeroU16::new(N_SHARDS).unwrap(), | ||
| ) | ||
| .unwrap(); | ||
| for row in rows { | ||
| let _ = enc.encode_all_repair_symbols(row); | ||
| } | ||
| }); | ||
| }, | ||
| ); | ||
| } | ||
|
|
||
| group.finish(); | ||
| } | ||
|
|
||
| /// Benchmark primary encoding: RS-encode each column to produce all primary symbols. | ||
| fn primary_encoding(c: &mut Criterion) { | ||
| let config = encoding_config(); | ||
| let mut group = c.benchmark_group("primary_encoding"); | ||
| group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); | ||
|
|
||
| for (blob_size, size_str) in BLOB_SIZES { | ||
| let blob = random_data((*blob_size).try_into().unwrap()); | ||
| group.throughput(criterion::Throughput::Bytes(*blob_size)); | ||
|
|
||
| let encoder = config.get_blob_encoder(&blob).unwrap(); | ||
| let symbol_size = encoder.symbol_usize(); | ||
| let n_shards: usize = N_SHARDS.into(); | ||
| let n_rows: usize = config.n_primary_source_symbols().get().into(); | ||
|
|
||
| // Build column data (each column = one secondary sliver's symbols data). | ||
| let col_len = n_rows * symbol_size; | ||
| let columns: Vec<Vec<u8>> = (0..n_shards).map(|_| random_data(col_len)).collect(); | ||
|
|
||
| group.bench_with_input( | ||
| BenchmarkId::new("encode_columns", size_str), | ||
| &columns, | ||
| |b, columns| { | ||
| b.iter(|| { | ||
| let mut enc = ReedSolomonEncoder::new( | ||
| NonZeroU16::new(symbol_size.try_into().unwrap()).unwrap(), | ||
| config.n_primary_source_symbols(), | ||
| NonZeroU16::new(N_SHARDS).unwrap(), | ||
| ) | ||
| .unwrap(); | ||
| for col in columns { | ||
| let _ = enc.encode_all_ref(col); | ||
| } | ||
| }); | ||
| }, | ||
| ); | ||
| } | ||
|
|
||
| group.finish(); | ||
| } | ||
|
|
||
| /// Benchmark primary encoding + leaf hashing of each symbol. | ||
| fn primary_encoding_with_hashing(c: &mut Criterion) { | ||
| let config = encoding_config(); | ||
| let mut group = c.benchmark_group("primary_encoding_with_hashing"); | ||
| group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); | ||
|
|
||
| for (blob_size, size_str) in BLOB_SIZES { | ||
| let blob = random_data((*blob_size).try_into().unwrap()); | ||
| group.throughput(criterion::Throughput::Bytes(*blob_size)); | ||
|
|
||
| let encoder = config.get_blob_encoder(&blob).unwrap(); | ||
| let symbol_size = encoder.symbol_usize(); | ||
| let n_shards: usize = N_SHARDS.into(); | ||
| let n_rows: usize = config.n_primary_source_symbols().get().into(); | ||
|
|
||
| let col_len = n_rows * symbol_size; | ||
| let columns: Vec<Vec<u8>> = (0..n_shards).map(|_| random_data(col_len)).collect(); | ||
|
|
||
| group.bench_with_input( | ||
| BenchmarkId::new("encode_columns_and_hash", size_str), | ||
| &columns, | ||
| |b, columns| { | ||
| b.iter(|| { | ||
| let mut enc = ReedSolomonEncoder::new( | ||
| NonZeroU16::new(symbol_size.try_into().unwrap()).unwrap(), | ||
| config.n_primary_source_symbols(), | ||
| NonZeroU16::new(N_SHARDS).unwrap(), | ||
| ) | ||
| .unwrap(); | ||
| let mut hashes = vec![Node::Empty; n_shards * n_shards]; | ||
| for (col_index, col) in columns.iter().enumerate() { | ||
| let symbols = enc.encode_all_ref(col).unwrap(); | ||
| for (row_index, symbol) in symbols.to_symbols().enumerate() { | ||
| hashes[n_shards * row_index + col_index] = | ||
| leaf_hash::<Blake2b256>(symbol); | ||
| } | ||
| } | ||
| }); | ||
| }, | ||
| ); | ||
| } | ||
|
|
||
| group.finish(); | ||
| } | ||
|
|
||
| /// Benchmark metadata computation from pre-computed symbol hashes (Merkle tree construction). | ||
| fn metadata_from_hashes(c: &mut Criterion) { | ||
| let mut group = c.benchmark_group("metadata_from_hashes"); | ||
| group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); | ||
|
|
||
| for (blob_size, size_str) in BLOB_SIZES { | ||
| group.throughput(criterion::Throughput::Bytes(*blob_size)); | ||
|
|
||
| let n_shards: usize = N_SHARDS.into(); | ||
| // Pre-compute random hashes to isolate metadata construction time. | ||
| let hashes: Vec<Node> = (0..n_shards * n_shards) | ||
| .map(|i| { | ||
| let data = i.to_le_bytes(); | ||
| leaf_hash::<Blake2b256>(&data) | ||
| }) | ||
| .collect(); | ||
|
|
||
| group.bench_with_input( | ||
| BenchmarkId::new("build_merkle_trees", size_str), | ||
| &hashes, | ||
| |b, hashes| { | ||
| b.iter(|| { | ||
| // Build 2 * n_shards Merkle trees (primary + secondary per sliver pair). | ||
| for sliver_index in 0..n_shards { | ||
| let _primary = MerkleTree::<Blake2b256>::build_from_leaf_hashes( | ||
| hashes[n_shards * sliver_index..n_shards * (sliver_index + 1)] | ||
| .iter() | ||
| .cloned(), | ||
| ); | ||
| let _secondary = MerkleTree::<Blake2b256>::build_from_leaf_hashes( | ||
| (0..n_shards).map(|symbol_index| { | ||
| hashes[n_shards * symbol_index + n_shards - 1 - sliver_index] | ||
| .clone() | ||
| }), | ||
| ); | ||
| } | ||
| }); | ||
| }, | ||
| ); | ||
| } | ||
|
|
||
| group.finish(); | ||
| } | ||
|
|
||
| /// Benchmark the full `encode_with_metadata()` pipeline for comparison. | ||
| fn full_pipeline(c: &mut Criterion) { | ||
| let config = encoding_config(); | ||
| let mut group = c.benchmark_group("full_pipeline"); | ||
| group.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic)); | ||
|
|
||
| for (blob_size, size_str) in BLOB_SIZES { | ||
| let blob = random_data((*blob_size).try_into().unwrap()); | ||
| group.throughput(criterion::Throughput::Bytes(*blob_size)); | ||
|
|
||
| group.bench_with_input( | ||
| BenchmarkId::new("encode_with_metadata", size_str), | ||
| &blob, | ||
| |b, blob| { | ||
| b.iter(|| { | ||
| let encoder = config.get_blob_encoder(blob).unwrap(); | ||
| let _result = encoder.encode_with_metadata(); | ||
| }); | ||
| }, | ||
| ); | ||
| } | ||
|
|
||
| group.finish(); | ||
| } | ||
|
|
||
| fn main() { | ||
| let mut criterion = Criterion::default() | ||
| .configure_from_args() | ||
| .sample_size(10) | ||
| .warm_up_time(Duration::from_millis(10)); | ||
|
|
||
| secondary_encoding(&mut criterion); | ||
| primary_encoding(&mut criterion); | ||
| primary_encoding_with_hashing(&mut criterion); | ||
| metadata_from_hashes(&mut criterion); | ||
| full_pipeline(&mut criterion); | ||
|
|
||
| criterion.final_summary(); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Question: What is the relationship to the existing benchmarks? Can we combine them?