From b9d748ddf7cba2f08774523f55f241fe86eaa1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= Date: Mon, 20 Nov 2023 15:16:38 +0700 Subject: [PATCH 1/4] Bump randomx-rs, don't override Mac SDK with old 12.3 --- .github/workflows/ci.yml | 16 ---------------- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b1d25dad..1727bd62 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -67,26 +67,17 @@ jobs: run: cargo test --all-features --release env: RUSTFLAGS: ${{ matrix.rustflags }} - # https://github.com/tevador/RandomX/issues/262 - # https://github.com/tari-project/randomx-rs/issues/48 - SDKROOT: "/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" - name: Test scrypt-ocl crate if: matrix.os != 'windows-2019' run: cargo test -p scrypt-ocl --all-features --release -- --test-threads=1 env: RUSTFLAGS: ${{ matrix.rustflags }} - # https://github.com/tevador/RandomX/issues/262 - # https://github.com/tari-project/randomx-rs/issues/48 - SDKROOT: "/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" - name: Test ffi crate run: cargo test -p post-cbindings --all-features --release -- --test-threads=1 env: RUSTFLAGS: ${{ matrix.rustflags }} - # https://github.com/tevador/RandomX/issues/262 - # https://github.com/tari-project/randomx-rs/issues/48 - SDKROOT: "/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" fmt: name: Rustfmt @@ -218,9 +209,6 @@ jobs: run: cargo build -p post-cbindings --profile release-clib env: RUSTFLAGS: ${{ matrix.rustflags }} - # https://github.com/tevador/RandomX/issues/262 - # https://github.com/tari-project/randomx-rs/issues/48 - SDKROOT: "/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" - name: Archive production artifacts uses: actions/upload-artifact@v3 with: @@ -235,9 +223,6 @@ jobs: run: cargo build -p profiler --release env: RUSTFLAGS: ${{ matrix.rustflags }} - # https://github.com/tevador/RandomX/issues/262 - # https://github.com/tari-project/randomx-rs/issues/48 - SDKROOT: "/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" - name: Archive profiler artifacts uses: actions/upload-artifact@v3 with: @@ -253,7 +238,6 @@ jobs: run: cargo build -p service --release env: RUSTFLAGS: ${{ matrix.rustflags }} - SDKROOT: "/Library/Developer/CommandLineTools/SDKs/MacOSX12.3.sdk" - name: Archive service artifacts uses: actions/upload-artifact@v3 with: diff --git a/Cargo.lock b/Cargo.lock index 5f366808..acc84438 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2643,8 +2643,8 @@ dependencies = [ [[package]] name = "randomx-rs" -version = "1.1.15" -source = "git+https://github.com/spacemeshos/randomx-rs?rev=6f2bf32af7219a5f9ae929c3020242ecc7c6dd6a#6f2bf32af7219a5f9ae929c3020242ecc7c6dd6a" +version = "1.3.0" +source = "git+https://github.com/spacemeshos/randomx-rs?rev=d46bcd90e09428883e253b8203d6b311b0a07b91#d46bcd90e09428883e253b8203d6b311b0a07b91" dependencies = [ "bitflags 2.4.0", "libc", diff --git a/Cargo.toml b/Cargo.toml index 6872c271..a66d04e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ bitvec = "1.0.1" rayon = "1.6.1" rand = "0.8.5" log = "0.4.17" -randomx-rs = { git = "https://github.com/spacemeshos/randomx-rs", rev = "6f2bf32af7219a5f9ae929c3020242ecc7c6dd6a" } +randomx-rs = { git = "https://github.com/spacemeshos/randomx-rs", rev = "d46bcd90e09428883e253b8203d6b311b0a07b91" } primitive-types = "0.12.1" From 2183ebc4005f8c586115c012ac252d695f72efc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= Date: Tue, 21 Nov 2023 15:00:19 +0700 Subject: [PATCH 2/4] Add --max-retries to the post service --- service/src/client.rs | 30 ++++++++++++++---------- service/src/main.rs | 45 +++++++++++++++++++++++++++++------- service/tests/server/mod.rs | 8 +------ service/tests/test_client.rs | 12 +++++----- 4 files changed, 62 insertions(+), 33 deletions(-) diff --git a/service/src/client.rs b/service/src/client.rs index efb68879..af4923e9 100644 --- a/service/src/client.rs +++ b/service/src/client.rs @@ -30,7 +30,6 @@ pub mod spacemesh_v1 { pub struct ServiceClient { endpoint: Endpoint, - reconnect_interval: Duration, service: S, } @@ -69,7 +68,6 @@ impl PostService for std::sync::Arc { impl ServiceClient { pub fn new( address: String, - reconnect_interval: Duration, tls: Option<(Option, Certificate, Identity)>, service: S, ) -> eyre::Result { @@ -96,28 +94,37 @@ impl ServiceClient { None => endpoint, }; - Ok(Self { - endpoint, - reconnect_interval, - service, - }) + Ok(Self { endpoint, service }) } - pub async fn run(mut self) -> eyre::Result<()> { + pub async fn run( + mut self, + max_retries: Option, + reconnect_interval: Duration, + ) -> eyre::Result<()> { loop { + let mut attempt = 1; let client = loop { - log::debug!("connecting to the node on {}", self.endpoint.uri()); + log::debug!( + "connecting to the node on {} (attempt {})", + self.endpoint.uri(), + attempt + ); match PostServiceClient::connect(self.endpoint.clone()).await { Ok(client) => break client, Err(e) => { log::info!("could not connect to the node: {e}"); - sleep(self.reconnect_interval).await; + if let Some(max) = max_retries { + eyre::ensure!(attempt <= max, "max retries ({max}) reached"); + } + sleep(reconnect_interval).await; } } + attempt += 1; }; let res = self.register_and_serve(client).await; log::info!("disconnected: {res:?}"); - sleep(self.reconnect_interval).await; + sleep(reconnect_interval).await; } } @@ -275,7 +282,6 @@ mod tests { let client_crt = rcgen::generate_simple_self_signed(vec!["localhost".into()]).unwrap(); super::ServiceClient::new( "https://localhost:1234".to_string(), - Default::default(), Some(( None, Certificate::from_pem(crt.serialize_pem().unwrap()), diff --git a/service/src/main.rs b/service/src/main.rs index 7f59c41b..e0227f0f 100644 --- a/service/src/main.rs +++ b/service/src/main.rs @@ -21,6 +21,10 @@ struct Cli { /// time to wait before reconnecting to the node #[arg(long, default_value = "5", value_parser = |secs: &str| secs.parse().map(Duration::from_secs))] reconnect_interval_s: Duration, + /// Maximum number of retries to connect to the node + /// The default is infinite. + #[arg(long)] + max_retries: Option, #[command(flatten, next_help_heading = "POST configuration")] post_config: PostConfig, @@ -219,14 +223,28 @@ async fn main() -> eyre::Result<()> { None }; - let client = client::ServiceClient::new(args.address, args.reconnect_interval_s, tls, service)?; - let client_handle = tokio::spawn(client.run()); + let client = client::ServiceClient::new(args.address, tls, service)?; + let client_handle = tokio::spawn(client.run(args.max_retries, args.reconnect_interval_s)); - if let Some(pid) = args.watch_pid { - tokio::task::spawn_blocking(move || watch_pid(pid, Duration::from_secs(1))).await?; - Ok(()) - } else { - client_handle.await? + tokio::select! { + Some(err) = watch_pid_if_needed(args.watch_pid) => { + log::info!("PID watcher exited: {err:?}"); + return Ok(()) + } + err = client_handle => { + return err.unwrap(); + } + } +} + +async fn watch_pid_if_needed( + pid: Option, +) -> Option> { + match pid { + Some(pid) => { + Some(tokio::task::spawn_blocking(move || watch_pid(pid, Duration::from_secs(1))).await) + } + None => None, } } @@ -254,7 +272,18 @@ fn watch_pid(pid: Pid, interval: Duration) { mod tests { use std::process::Command; - use sysinfo::PidExt; + use sysinfo::{Pid, PidExt}; + + #[tokio::test] + async fn watch_pid_if_needed() { + // Don't watch + assert!(super::watch_pid_if_needed(None).await.is_none()); + // Watch + super::watch_pid_if_needed(Some(Pid::from(0))) + .await + .expect("should be some") + .expect("should be OK"); + } #[tokio::test] async fn watching_pid_zombie() { diff --git a/service/tests/server/mod.rs b/service/tests/server/mod.rs index db34ca1b..a1d406f5 100644 --- a/service/tests/server/mod.rs +++ b/service/tests/server/mod.rs @@ -118,13 +118,7 @@ impl TestServer { where S: PostService, { - ServiceClient::new( - format!("http://{}", self.addr), - std::time::Duration::from_secs(1), - None, - service, - ) - .unwrap() + ServiceClient::new(format!("http://{}", self.addr), None, service).unwrap() } pub async fn generate_proof( diff --git a/service/tests/test_client.rs b/service/tests/test_client.rs index eb8c659e..15bc4e68 100644 --- a/service/tests/test_client.rs +++ b/service/tests/test_client.rs @@ -27,7 +27,7 @@ use server::{TestNodeRequest, TestServer}; async fn test_registers() { let mut test_server = TestServer::new().await; let client = test_server.create_client(Arc::new(MockPostService::new())); - let client_handle = tokio::spawn(client.run()); + let client_handle = tokio::spawn(client.run(None, std::time::Duration::from_secs(1))); // Check if client registered test_server.connected.recv().await.unwrap(); @@ -45,7 +45,7 @@ async fn test_gen_proof_in_progress() { .returning(|_| Ok(ProofGenState::InProgress)); let service = Arc::new(service); let client = test_server.create_client(service.clone()); - let client_handle = tokio::spawn(client.run()); + let client_handle = tokio::spawn(client.run(None, std::time::Duration::from_secs(1))); let connected = test_server.connected.recv().await.unwrap(); let response = TestServer::generate_proof(&connected, vec![0xCA; 32]).await; @@ -74,7 +74,7 @@ async fn test_gen_proof_failed() { let service = Arc::new(service); let client = test_server.create_client(service.clone()); - let client_handle = tokio::spawn(client.run()); + let client_handle = tokio::spawn(client.run(None, std::time::Duration::from_secs(1))); let connected = test_server.connected.recv().await.unwrap(); let response = TestServer::generate_proof(&connected, vec![0xCA; 32]).await; @@ -137,7 +137,7 @@ async fn test_gen_proof_finished() { let service = Arc::new(service); let client = test_server.create_client(service.clone()); - let client_handle = tokio::spawn(client.run()); + let client_handle = tokio::spawn(client.run(None, std::time::Duration::from_secs(1))); let connected = test_server.connected.recv().await.unwrap(); @@ -191,7 +191,7 @@ async fn test_broken_request_no_kind() { let service = Arc::new(service); let client = test_server.create_client(service.clone()); - let client_handle = tokio::spawn(client.run()); + let client_handle = tokio::spawn(client.run(None, std::time::Duration::from_secs(1))); let connected = test_server.connected.recv().await.unwrap(); @@ -262,7 +262,7 @@ async fn test_get_metadata(#[case] vrf_difficulty: Option<[u8; 32]>) { .unwrap(); let client = test_server.create_client(Arc::new(service)); - let client_handle = tokio::spawn(client.run()); + let client_handle = tokio::spawn(client.run(None, std::time::Duration::from_secs(1))); let connected = test_server.connected.recv().await.unwrap(); let response = TestServer::request_metadata(&connected).await; From 56f90a46e7a6f83c80d7bc812f4eefebd6a3b1ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= Date: Thu, 23 Nov 2023 09:43:35 +0700 Subject: [PATCH 3/4] Add UT to test max retries --- certifier/tests/test_certify.rs | 2 +- service/src/client.rs | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/certifier/tests/test_certify.rs b/certifier/tests/test_certify.rs index 53023479..4e79827b 100644 --- a/certifier/tests/test_certify.rs +++ b/certifier/tests/test_certify.rs @@ -51,7 +51,7 @@ async fn test_certificate_post_proof() { // Spawn the certifier service let signer = SigningKey::generate(&mut rand::rngs::OsRng); let app = certifier::certifier::new(cfg, init_cfg, signer); - let server = axum::Server::bind(&"127.0.0.1:0".parse().unwrap()).serve(app.into_make_service()); + let server = axum::Server::bind(&([127, 0, 0, 1], 0).into()).serve(app.into_make_service()); let addr = server.local_addr(); tokio::spawn(server); diff --git a/service/src/client.rs b/service/src/client.rs index af4923e9..7ed1ebe5 100644 --- a/service/src/client.rs +++ b/service/src/client.rs @@ -274,6 +274,8 @@ fn convert_metadata(meta: PostMetadata) -> spacemesh_v1::Metadata { #[cfg(test)] mod tests { + use std::time::Duration; + use tonic::transport::{Certificate, Identity}; #[test] @@ -294,4 +296,17 @@ mod tests { ) .unwrap(); } + + #[tokio::test] + async fn gives_up_after_max_retries() { + let client = super::ServiceClient::new( + "http://localhost:1234".to_string(), + None, + super::MockPostService::new(), + ) + .unwrap(); + + let res = client.run(Some(2), Duration::from_millis(1)).await; + assert_eq!(res.unwrap_err().to_string(), "max retries (2) reached"); + } } From c1efa571655073eb9ad140377fc90643c57c8ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bartosz=20R=C3=B3=C5=BCa=C5=84ski?= Date: Thu, 23 Nov 2023 09:45:56 +0700 Subject: [PATCH 4/4] Run certifier and service UTs in CI --- .github/workflows/ci.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5bc8dedf..d4a368ec 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,9 @@ jobs: toolchain: stable steps: + - uses: arduino/setup-protoc@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} - uses: actions/checkout@v3 with: submodules: true @@ -64,8 +67,8 @@ jobs: unzip -j OpenCL-SDK-v2023.04.17-Win-x64.zip OpenCL-SDK-v2023.04.17-Win-x64/lib/OpenCL.lib - uses: Swatinem/rust-cache@v2 - - name: Test post crate - run: cargo test --all-features --release + - name: Tests + run: cargo test --all-features --release -p post-rs -p certifier -p service env: RUSTFLAGS: ${{ matrix.rustflags }} # https://github.com/tevador/RandomX/issues/262