From c715a962eb5f0c8bcc2d997127fd1a2014097832 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 22 Oct 2025 15:17:32 -0700 Subject: [PATCH] chore(misc): update datasets, benchmarks to use alpha, beta prefixes --- llama_stack/core/server/server.py | 4 +++- tests/integration/datasets/test_datasets.py | 10 +++++----- tests/integration/eval/test_eval.py | 14 +++++++------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index dd21a72f9d..845686f159 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -174,7 +174,9 @@ def __init__(self, config: StackRunConfig, *args, **kwargs): @asynccontextmanager async def lifespan(app: StackApp): - logger.info("Starting up") + server_version = parse_version("llama-stack") + + logger.info(f"Starting up Llama Stack server (version: {server_version})") assert app.stack is not None app.stack.create_registry_refresh_task() yield diff --git a/tests/integration/datasets/test_datasets.py b/tests/integration/datasets/test_datasets.py index 60db95f301..3ad5570f07 100644 --- a/tests/integration/datasets/test_datasets.py +++ b/tests/integration/datasets/test_datasets.py @@ -78,18 +78,18 @@ def data_url_from_file(file_path: str) -> str: ], ) def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose=purpose, source=source, ) assert dataset.identifier is not None assert dataset.provider_id == provider_id - iterrow_response = llama_stack_client.datasets.iterrows(dataset.identifier, limit=limit) + iterrow_response = llama_stack_client.beta.datasets.iterrows(dataset.identifier, limit=limit) assert len(iterrow_response.data) == limit - dataset_list = llama_stack_client.datasets.list() + dataset_list = llama_stack_client.beta.datasets.list() assert dataset.identifier in [d.identifier for d in dataset_list] - llama_stack_client.datasets.unregister(dataset.identifier) - dataset_list = llama_stack_client.datasets.list() + llama_stack_client.beta.datasets.unregister(dataset.identifier) + dataset_list = llama_stack_client.beta.datasets.list() assert dataset.identifier not in [d.identifier for d in dataset_list] diff --git a/tests/integration/eval/test_eval.py b/tests/integration/eval/test_eval.py index 01581e8296..98b3302e09 100644 --- a/tests/integration/eval/test_eval.py +++ b/tests/integration/eval/test_eval.py @@ -17,17 +17,17 @@ @pytest.mark.parametrize("scoring_fn_id", ["basic::equality"]) def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose="eval/messages-answer", source={ "type": "uri", "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"), }, ) - response = llama_stack_client.datasets.list() + response = llama_stack_client.beta.datasets.list() assert any(x.identifier == dataset.identifier for x in response) - rows = llama_stack_client.datasets.iterrows( + rows = llama_stack_client.beta.datasets.iterrows( dataset_id=dataset.identifier, limit=3, ) @@ -37,12 +37,12 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): scoring_fn_id, ] benchmark_id = str(uuid.uuid4()) - llama_stack_client.benchmarks.register( + llama_stack_client.alpha.benchmarks.register( benchmark_id=benchmark_id, dataset_id=dataset.identifier, scoring_functions=scoring_functions, ) - list_benchmarks = llama_stack_client.benchmarks.list() + list_benchmarks = llama_stack_client.alpha.benchmarks.list() assert any(x.identifier == benchmark_id for x in list_benchmarks) response = llama_stack_client.alpha.eval.evaluate_rows( @@ -66,7 +66,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): @pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"]) def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): - dataset = llama_stack_client.datasets.register( + dataset = llama_stack_client.beta.datasets.register( purpose="eval/messages-answer", source={ "type": "uri", @@ -74,7 +74,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): }, ) benchmark_id = str(uuid.uuid4()) - llama_stack_client.benchmarks.register( + llama_stack_client.alpha.benchmarks.register( benchmark_id=benchmark_id, dataset_id=dataset.identifier, scoring_functions=[scoring_fn_id],