Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 36 additions & 37 deletions .github/workflows/native-bazel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,42 +60,41 @@ jobs:
fi
shell: bash

# FIXME(palfrey): Can't make this reliably run in CI
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

# redis-store-tester:
# name: Redis store tester
# runs-on: ubuntu-24.04
# timeout-minutes: 30
# services:
# redis:
# image: redis:8.0.5-alpine3.21
# options: >-
# --health-cmd "redis-cli ping"
# --health-interval 10s
# --health-timeout 5s
# --health-retries 5
# ports:
# - 6379:6379
# steps:
# - name: Checkout
# uses: >- # v4.2.2
# actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
redis-store-tester:
name: Redis store tester
runs-on: ubuntu-24.04
timeout-minutes: 30
services:
redis:
image: redis:8.0.5-alpine3.21
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 6379:6379
steps:
- name: Checkout
uses: >- # v4.2.2
actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

# - name: Setup Bazel
# uses: >- # v0.13.0
# bazel-contrib/setup-bazel@663f88d97adf17db2523a5b385d9407a562e5551
# with:
# bazelisk-cache: true
# repository-cache: true
# disk-cache: ${{ github.workflow }}-ubuntu-24.04
- name: Setup Bazel
uses: >- # v0.13.0
bazel-contrib/setup-bazel@663f88d97adf17db2523a5b385d9407a562e5551
with:
bazelisk-cache: true
repository-cache: true
disk-cache: ${{ github.workflow }}-ubuntu-24.04

# - name: Run Bazel tests
# run: |
# bazel run //:redis_store_tester \
# --extra_toolchains=@rust_toolchains//:all \
# --verbose_failures
# env:
# RUST_LOG: trace
# REDIS_HOST: localhost
# MAX_REDIS_PERMITS: 50 # because CI times out sometimes
# MAX_LOOPS: 10000 # Not reliably running above this sort of level (possible low memory?)
# shell: bash
- name: Run Bazel tests
run: |
bazel run //:redis_store_tester \
--extra_toolchains=@rust_toolchains//:all \
--verbose_failures
env:
RUST_LOG: trace
REDIS_HOST: localhost
MAX_REDIS_PERMITS: 50 # because CI times out sometimes
MAX_LOOPS: 10000 # Not reliably running above this sort of level (possible low memory?)
shell: bash
2 changes: 1 addition & 1 deletion nativelink-store/src/redis_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ impl RedisStore {
async fn get_client(&'_ self) -> Result<ClientWithPermit<'_>, Error> {
let client = self.client_pool.next();
let config = client.client_config();
if config.mocks.is_none() {
if config.mocks.is_none() && !client.is_connected() {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means Fred isn't logging every time we call this

client.wait_for_connect().await.err_tip(||
format!(
"Connection issue connecting to redis server with hosts: {:?}, username: {}, database: {}",
Expand Down
1 change: 0 additions & 1 deletion nativelink-util/src/telemetry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ fn otlp_filter() -> EnvFilter {
.add_directive(expect_parse("h2=off"))
.add_directive(expect_parse("reqwest=off"))
.add_directive(expect_parse("tower=off"))
.add_directive(expect_parse("fred=off"))
}

// Create a tracing layer intended for stdout printing.
Expand Down
15 changes: 13 additions & 2 deletions src/bin/redis_store_tester.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use core::sync::atomic::{AtomicUsize, Ordering};
use std::borrow::Cow;
use std::env;
use std::sync::{Arc, RwLock};
use std::{env, thread, usize};

use bytes::Bytes;
use nativelink_config::stores::RedisSpec;
Expand Down Expand Up @@ -86,11 +86,20 @@ fn main() -> Result<(), Box<dyn core::error::Error>> {
.unwrap_or_else(|_| "2000000".to_string())
.parse()?;

// Cap the max threads at 1/2 of the system max allowed, so Redis still has some
// CPU available. Make sure we've got at least one available!
let max_threads = {
let raw = thread::available_parallelism()?;
let halved = raw.get() / 2;
halved.clamp(1, usize::MAX)
};

#[expect(
clippy::disallowed_methods,
reason = "`We need `tokio::runtime::Runtime::block_on` so we can get errors _after_ threads finished"
)]
tokio::runtime::Builder::new_multi_thread()
.worker_threads(max_threads)
.enable_all()
.build()
.unwrap()
Expand All @@ -100,6 +109,8 @@ fn main() -> Result<(), Box<dyn core::error::Error>> {
.await?
.expect("Init tracing should work");

info!(max_threads, "Starting runner");

let spec = RedisSpec {
addresses: vec![format!("redis://{redis_host}:6379/")],
connection_timeout_ms: 1000,
Expand Down Expand Up @@ -174,7 +185,7 @@ fn main() -> Result<(), Box<dyn core::error::Error>> {

let res = store_clone.get_and_decode(data.clone()).await?;
if let Some(existing_data) = res {
data.version = existing_data.version + 1;
data.version = existing_data.version;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't realize we tried to hack optimistic locking here. This change should prevent race conditions if the version was already up to date. Have you verified that this does not break the update logic of your tester?

}

store_clone.update_data(data).await?;
Expand Down
Loading