diff --git a/.github/workflows/native-bazel.yaml b/.github/workflows/native-bazel.yaml index 13f1844b5..1edf23c1f 100644 --- a/.github/workflows/native-bazel.yaml +++ b/.github/workflows/native-bazel.yaml @@ -60,42 +60,41 @@ jobs: fi shell: bash - # FIXME(palfrey): Can't make this reliably run in CI - # redis-store-tester: - # name: Redis store tester - # runs-on: ubuntu-24.04 - # timeout-minutes: 30 - # services: - # redis: - # image: redis:8.0.5-alpine3.21 - # options: >- - # --health-cmd "redis-cli ping" - # --health-interval 10s - # --health-timeout 5s - # --health-retries 5 - # ports: - # - 6379:6379 - # steps: - # - name: Checkout - # uses: >- # v4.2.2 - # actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + redis-store-tester: + name: Redis store tester + runs-on: ubuntu-24.04 + timeout-minutes: 30 + services: + redis: + image: redis:8.0.5-alpine3.21 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 6379:6379 + steps: + - name: Checkout + uses: >- # v4.2.2 + actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - # - name: Setup Bazel - # uses: >- # v0.13.0 - # bazel-contrib/setup-bazel@663f88d97adf17db2523a5b385d9407a562e5551 - # with: - # bazelisk-cache: true - # repository-cache: true - # disk-cache: ${{ github.workflow }}-ubuntu-24.04 + - name: Setup Bazel + uses: >- # v0.13.0 + bazel-contrib/setup-bazel@663f88d97adf17db2523a5b385d9407a562e5551 + with: + bazelisk-cache: true + repository-cache: true + disk-cache: ${{ github.workflow }}-ubuntu-24.04 - # - name: Run Bazel tests - # run: | - # bazel run //:redis_store_tester \ - # --extra_toolchains=@rust_toolchains//:all \ - # --verbose_failures - # env: - # RUST_LOG: trace - # REDIS_HOST: localhost - # MAX_REDIS_PERMITS: 50 # because CI times out sometimes - # MAX_LOOPS: 10000 # Not reliably running above this sort of level (possible low memory?) - # shell: bash + - name: Run Bazel tests + run: | + bazel run //:redis_store_tester \ + --extra_toolchains=@rust_toolchains//:all \ + --verbose_failures + env: + RUST_LOG: trace + REDIS_HOST: localhost + MAX_REDIS_PERMITS: 50 # because CI times out sometimes + MAX_LOOPS: 10000 # Not reliably running above this sort of level (possible low memory?) + shell: bash diff --git a/nativelink-store/src/redis_store.rs b/nativelink-store/src/redis_store.rs index 755e42236..2f4068ad3 100644 --- a/nativelink-store/src/redis_store.rs +++ b/nativelink-store/src/redis_store.rs @@ -332,7 +332,7 @@ impl RedisStore { async fn get_client(&'_ self) -> Result, Error> { let client = self.client_pool.next(); let config = client.client_config(); - if config.mocks.is_none() { + if config.mocks.is_none() && !client.is_connected() { client.wait_for_connect().await.err_tip(|| format!( "Connection issue connecting to redis server with hosts: {:?}, username: {}, database: {}", diff --git a/nativelink-util/src/telemetry.rs b/nativelink-util/src/telemetry.rs index d05c1eedb..eebcc9219 100644 --- a/nativelink-util/src/telemetry.rs +++ b/nativelink-util/src/telemetry.rs @@ -67,7 +67,6 @@ fn otlp_filter() -> EnvFilter { .add_directive(expect_parse("h2=off")) .add_directive(expect_parse("reqwest=off")) .add_directive(expect_parse("tower=off")) - .add_directive(expect_parse("fred=off")) } // Create a tracing layer intended for stdout printing. diff --git a/src/bin/redis_store_tester.rs b/src/bin/redis_store_tester.rs index 82f5aa57e..116fbbfeb 100644 --- a/src/bin/redis_store_tester.rs +++ b/src/bin/redis_store_tester.rs @@ -1,7 +1,7 @@ use core::sync::atomic::{AtomicUsize, Ordering}; use std::borrow::Cow; -use std::env; use std::sync::{Arc, RwLock}; +use std::{env, thread, usize}; use bytes::Bytes; use nativelink_config::stores::RedisSpec; @@ -86,11 +86,20 @@ fn main() -> Result<(), Box> { .unwrap_or_else(|_| "2000000".to_string()) .parse()?; + // Cap the max threads at 1/2 of the system max allowed, so Redis still has some + // CPU available. Make sure we've got at least one available! + let max_threads = { + let raw = thread::available_parallelism()?; + let halved = raw.get() / 2; + halved.clamp(1, usize::MAX) + }; + #[expect( clippy::disallowed_methods, reason = "`We need `tokio::runtime::Runtime::block_on` so we can get errors _after_ threads finished" )] tokio::runtime::Builder::new_multi_thread() + .worker_threads(max_threads) .enable_all() .build() .unwrap() @@ -100,6 +109,8 @@ fn main() -> Result<(), Box> { .await? .expect("Init tracing should work"); + info!(max_threads, "Starting runner"); + let spec = RedisSpec { addresses: vec![format!("redis://{redis_host}:6379/")], connection_timeout_ms: 1000, @@ -174,7 +185,7 @@ fn main() -> Result<(), Box> { let res = store_clone.get_and_decode(data.clone()).await?; if let Some(existing_data) = res { - data.version = existing_data.version + 1; + data.version = existing_data.version; } store_clone.update_data(data).await?;