From f8be11eaa1b71fa23ee17cd4440e37ff5a6f15de Mon Sep 17 00:00:00 2001 From: Victor Dumitrescu Date: Mon, 8 Jun 2026 10:55:30 +0200 Subject: [PATCH 1/2] test(dursto): add a standalone long tests binary --- Cargo.lock | 1 + durable-storage/Cargo.toml | 10 ++ durable-storage/Makefile | 1 + durable-storage/src/bin/database_long_test.rs | 95 +++++++++++++++++++ durable-storage/src/long_test/mod.rs | 77 ++++++++++++--- 5 files changed, 169 insertions(+), 15 deletions(-) create mode 100644 durable-storage/src/bin/database_long_test.rs diff --git a/Cargo.lock b/Cargo.lock index 23edba8af2..7ca9fd74ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2561,6 +2561,7 @@ dependencies = [ "bincode", "bytes", "cfg-if", + "clap", "criterion", "derive_more", "goldenfile", diff --git a/durable-storage/Cargo.toml b/durable-storage/Cargo.toml index 97c08ada9e..84ce6b0c5c 100644 --- a/durable-storage/Cargo.toml +++ b/durable-storage/Cargo.toml @@ -10,6 +10,7 @@ workspace = true [features] default = ["rocksdb"] unstable-test-utils = [ + "dep:clap", "dep:proptest", "dep:serde", "dep:serde_with", @@ -44,6 +45,10 @@ optional = true workspace = true optional = true +[dependencies.clap] +workspace = true +optional = true + [dependencies.serde_json] workspace = true optional = true @@ -72,6 +77,11 @@ octez-riscv-test-utils.workspace = true serde.workspace = true serde_json.workspace = true +[[bin]] +name = "database_long_test" +path = "src/bin/database_long_test.rs" +required-features = ["rocksdb", "unstable-test-utils"] + [[bench]] name = "avl_tree" harness = false diff --git a/durable-storage/Makefile b/durable-storage/Makefile index 4115becb10..ed96cce89b 100644 --- a/durable-storage/Makefile +++ b/durable-storage/Makefile @@ -8,6 +8,7 @@ all: check test check: @cargo clippy --all-targets --no-default-features -- --deny warnings @cargo clippy -p xtask --all-targets -- --deny warnings + @cargo clippy --features unstable-test-utils --bin database_long_test -- --deny warnings # distinct from the workspace-level test in that it actually turns off # rocksdb: using the in-memory implementation instead diff --git a/durable-storage/src/bin/database_long_test.rs b/durable-storage/src/bin/database_long_test.rs new file mode 100644 index 0000000000..64baa71ba6 --- /dev/null +++ b/durable-storage/src/bin/database_long_test.rs @@ -0,0 +1,95 @@ +// SPDX-FileCopyrightText: 2026 Nomadic Labs +// +// SPDX-License-Identifier: MIT + +//! Binary for the long-running durable storage [`Database`] test. +//! +//! See [`octez_riscv_durable_storage::long_test`] for details +//! about long tests. +//! +//! [`Database`]: octez_riscv_durable_storage::database::Database + +use std::path::PathBuf; +use std::time::Duration; + +use anyhow::Context; +use anyhow::Result; +use clap::Parser; +use clap::Subcommand; +use octez_riscv_data::hash::Hash; +use octez_riscv_durable_storage::long_test::LongTestConfig; +use octez_riscv_durable_storage::long_test::replay_failure; +use octez_riscv_durable_storage::long_test::run_long_test; + +#[derive(Debug, Parser)] +#[command(version, long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + /// Run the long test. + Test { + /// Target number of operations sampled per epoch. + #[arg(long, default_value_t = 1000)] + ops_per_epoch: usize, + + /// Number of test cases per epoch. + #[arg(long, default_value_t = 256)] + cases_per_epoch: u32, + + /// 32-byte hex-encoded test seed (default: run with a fresh seed, printed at startup). + #[arg(long)] + seed: Option, + + /// Time budget in minutes (default: none). + #[arg(long)] + max_minutes: Option, + + /// Maximum number of epochs to run (default: run until the time budget). + #[arg(long)] + epochs: Option, + }, + /// Replay the failing epoch described by `/meta.json`. + Replay { + /// Failure directory containing the recorded artifacts. + dir: PathBuf, + }, +} + +fn main() -> Result<()> { + match Cli::parse().command { + Commands::Test { + ops_per_epoch, + cases_per_epoch, + seed, + max_minutes, + epochs, + } => { + let seed = match seed { + Some(seed) => { + let bytes = hex::decode(&seed).context("decoding hex seed")?; + let array: [u8; Hash::DIGEST_SIZE] = + bytes.as_slice().try_into().map_err(|_| { + anyhow::anyhow!("seed must be exactly 32 bytes ({} given)", bytes.len()) + })?; + Some(Hash::from(array)) + } + None => None, + }; + + let config = LongTestConfig { + ops_per_epoch, + cases_per_epoch, + seed, + time_budget: max_minutes.map(|m| Duration::from_secs(m * 60)), + epochs, + }; + + run_long_test(config) + } + Commands::Replay { dir } => replay_failure(&dir), + } +} diff --git a/durable-storage/src/long_test/mod.rs b/durable-storage/src/long_test/mod.rs index edb3b1ff03..729b20dd9e 100644 --- a/durable-storage/src/long_test/mod.rs +++ b/durable-storage/src/long_test/mod.rs @@ -21,7 +21,6 @@ pub mod strategy; use std::fs; use std::path::Path; -use std::path::PathBuf; use std::time::Duration; use std::time::Instant; @@ -69,8 +68,6 @@ pub struct LongTestConfig { pub seed: Option, /// Time budget. The loop stops cleanly once exceeded. pub time_budget: Option, - /// If set, replay the failing epoch described by `/meta.json`. - pub replay: Option, } /// Metadata persisted alongside a failure which enables replaying it. @@ -94,11 +91,6 @@ struct FailureMeta { /// Run the long-running test pub fn run_long_test(config: LongTestConfig) -> Result<()> { - // Replay reconstructs only the failing epoch; it is handled separately. - if let Some(replay_dir) = &config.replay { - return replay_failure(replay_dir); - } - let seed = config .seed .unwrap_or_else(|| rand::random::<[u8; 32]>().into()); @@ -110,9 +102,19 @@ pub fn run_long_test(config: LongTestConfig) -> Result<()> { .tempdir()? .keep(); - eprintln!("test seed: {seed}"); + let mut rerun = format!( + "cargo run --release --features rocksdb,unstable-test-utils --bin database_long_test -- \ + test --seed {seed} --ops-per-epoch {ops_per_epoch} --cases-per-epoch {cases_per_epoch}" + ); + if let Some(epochs) = max_epochs { + rerun.push_str(&format!(" --epochs {epochs}")); + } + if let Some(budget) = config.time_budget { + rerun.push_str(&format!(" --max-minutes {}", budget.as_secs() / 60)); + } eprintln!( - "out-dir: {} | ops/epoch: {ops_per_epoch} | cases/epoch: {cases_per_epoch}", + "test directory: {} | ops/epoch: {ops_per_epoch} | cases/epoch: {cases_per_epoch}\n\ + rerun with:\n{rerun}", out_dir.display(), ); @@ -171,6 +173,19 @@ pub fn run_long_test(config: LongTestConfig) -> Result<()> { match result { Ok(()) => { + // Size reporting only via the binary, not the crate test. + #[cfg(not(test))] + { + let snapshot_dir = persistent_repo.database_commit_dir(&base.commit); + let snapshot_size = dir_size(&snapshot_dir) + .context("measuring the size of the latest snapshot")?; + eprintln!( + "epoch {epoch} ok ({} keys, latest snapshot: {:.2} MiB)", + base.model.data.len(), + snapshot_size as f64 / (1024.0 * 1024.0), + ); + } + #[cfg(test)] eprintln!( "epoch {epoch} ok (db contains {} entries)", base.model.data.len() @@ -208,9 +223,38 @@ pub fn run_long_test(config: LongTestConfig) -> Result<()> { } eprintln!("completed {epoch} epochs"); + + // Size reporting only via the binary, not the crate test. + #[cfg(not(test))] + { + drop(runtime); + + let repo_size = dir_size(&repo_dir).context("measuring the size of the repo")?; + eprintln!( + "total repo size: {:.2} MiB", + repo_size as f64 / (1024.0 * 1024.0) + ); + } + Ok(()) } +/// Total size in bytes of all files under `dir`, recursively. +#[cfg(not(test))] +fn dir_size(dir: &Path) -> std::io::Result { + let mut size = 0; + for entry in fs::read_dir(dir)? { + let entry = entry?; + let metadata = entry.metadata()?; + if metadata.is_dir() { + size += dir_size(&entry.path())?; + } else { + size += metadata.len(); + } + } + Ok(size) +} + /// Build a deterministically seeded test runner for `epoch`. fn epoch_runner(seed: Hash, epoch: u64, cases: u32) -> TestRunner { // XOR the epoch index into the seed so each epoch has a distinct yet @@ -274,9 +318,11 @@ fn write_failure( .context("writing the persistent base snapshot")?; eprintln!( - "failure artifacts written to {}; replay with --replay {}", - failure_dir.display(), - failure_dir.display(), + "failure artifacts written to {failure}\n\ + replay with:\n\ + cargo run --release \ + --features rocksdb,unstable-test-utils --bin database_long_test -- replay {failure}", + failure = failure_dir.display(), ); Ok(()) } @@ -284,7 +330,7 @@ fn write_failure( /// Reproduce a recorded failure by reconstructing only the failing epoch. /// Both the persistence backend's base and the in-memory backend's base /// are restored from disk, and the saved (shrunk) operation sequence is applied once. -fn replay_failure(dir: &Path) -> Result<()> { +pub fn replay_failure(dir: &Path) -> Result<()> { fn read_failure_file( failure_dir: &Path, name: &str, @@ -374,6 +420,8 @@ fn replay_failure(dir: &Path) -> Result<()> { #[cfg(test)] mod tests { + use std::path::PathBuf; + use bytes::Bytes; use octez_riscv_test_utils::TestableTmpdir; use tokio::runtime::Runtime; @@ -390,7 +438,6 @@ mod tests { cases_per_epoch: 32, seed: None, time_budget: None, - replay: None, }) .expect("the short long test run should succeed"); } From 7325f47a09dbed8e5151474cd29878c0f2a47872 Mon Sep 17 00:00:00 2001 From: Victor Dumitrescu Date: Mon, 8 Jun 2026 15:31:01 +0200 Subject: [PATCH 2/2] feat(ci): add a PR job which runs the long tests --- .github/workflows/ci.yml | 3 +++ durable-storage/Makefile | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 73d1a80afe..6f8379a2b7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,9 @@ jobs: - runs-on: macos-latest make-target: check title: MacOS Checks + - runs-on: ubuntu-latest + make-target: durable/long-test + title: Durable Storage Long Tests name: ${{ matrix.title }} runs-on: ${{ matrix.runs-on }} diff --git a/durable-storage/Makefile b/durable-storage/Makefile index ed96cce89b..3f032c984d 100644 --- a/durable-storage/Makefile +++ b/durable-storage/Makefile @@ -20,4 +20,8 @@ reset-regressions: @cargo run -p xtask -- gen-database-regression-inputs @UPDATE_GOLDENFILES=1 cargo nextest run test_database_regression -.PHONY: all check test gen-regression-inputs +long-test: + @cargo run --release --features rocksdb,unstable-test-utils \ + --bin database_long_test -- test --max-minutes 10 + +.PHONY: all check test gen-regression-inputs long-test