Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added tests/catalog/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions tests/catalog/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from __future__ import annotations

import pytest

from tests.core.runner import env


@pytest.fixture(scope="session")
def cluster_template() -> str:
return env("OSAC_CLUSTER_TEMPLATE", "osac.templates.ocp_4_17_small")
108 changes: 108 additions & 0 deletions tests/catalog/test_catalog_item_lifecycle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from __future__ import annotations

import subprocess
import time
from uuid import uuid4

import pytest

from tests.core.grpc_client import GRPCClient
from tests.core.osac_cli import OsacCLI


def _unique_name(prefix: str) -> str:
return f"{prefix}-{uuid4().hex[:8]}"


def _wait_cluster_removed(grpc: GRPCClient, cluster_id: str, timeout: int = 30) -> None:
deadline = time.monotonic() + timeout
while cluster_id in grpc.list_cluster_ids():
if time.monotonic() > deadline:
break
time.sleep(2)
Comment on lines +17 to +22

@coderabbitai coderabbitai Bot Jun 16, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚖️ Poor tradeoff

Missing timeout exception masks cluster removal failures.

_wait_cluster_removed breaks silently when the deadline is exceeded without raising an exception. This allows teardown to proceed to catalog item deletion even when the cluster is still registered, causing downstream gRPC referential integrity failures that mask the actual removal problem.

Risk severity: Major — Silent timeouts produce misleading test failures and can leave orphaned catalog items when the real issue is cluster deletion/deregistration lag.

🛡️ Proposed fix to raise TimeoutError on deadline breach
 def _wait_cluster_removed(grpc: GRPCClient, cluster_id: str, timeout: int = 30) -> None:
     deadline = time.monotonic() + timeout
     while cluster_id in grpc.list_cluster_ids():
         if time.monotonic() > deadline:
-            break
+            raise TimeoutError(f"Cluster {cluster_id} still present after {timeout}s")
         time.sleep(2)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/catalog/test_catalog_item_lifecycle.py` around lines 17 - 22, The
`_wait_cluster_removed` function silently returns when the deadline is exceeded
without raising an exception, allowing test teardown to proceed even when the
cluster is still registered. After the while loop exits, add a check to
determine if the cluster_id is still present in grpc.list_cluster_ids(). If the
cluster remains registered after the deadline has passed, raise a TimeoutError
with a descriptive message instead of returning silently. This ensures that
cluster removal failures are properly surfaced rather than masked by downstream
referential integrity errors during catalog item deletion.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added to #70

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alosadagrande, thanks for tracking it in #70. The silent break will stay a potential source of confusing failures until that's addressed — worth keeping an eye on it.


🧠 Learnings used
Learnt from: tzvatot
Repo: osac-project/osac-test-infra PR: 44
File: tests/catalog/test_catalog_item_lifecycle.py:52-54
Timestamp: 2026-06-16T11:30:30.149Z
Learning: When reviewing Python E2E tests that use `GRPCClient.call_unchecked()` (from `tests/core/grpc_client.py` in this repo), allow assertions that match on the returned gRPC `output` string (e.g., substring/regex checks) and do not flag them as “fragile” solely on that basis. `call_unchecked()` only returns `(output: str, exit_code: int)` and does not provide structured gRPC status/error codes from the response, so string matching is the only viable assertion strategy for these tests. (You may still flag other issues, but not the string-based assertion fragility claim without acknowledging this constraint.)



def test_catalog_item_crud(grpc: GRPCClient, cluster_template: str) -> None:
name = _unique_name("e2e-cat")
catalog_item_id = grpc.create_cluster_catalog_item(name=name, template=cluster_template, published=True)
try:
assert catalog_item_id in grpc.list_cluster_catalog_item_ids()

item = grpc.get_cluster_catalog_item(catalog_item_id=catalog_item_id)
obj = item["object"]
assert obj["title"] == name
assert obj["template"] == cluster_template
assert obj["published"] is True

grpc.delete_cluster_catalog_item(catalog_item_id=catalog_item_id)
catalog_item_id = ""

assert catalog_item_id not in grpc.list_cluster_catalog_item_ids()
Comment on lines +37 to +40

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

Deletion verification checks wrong value (always passes).

Line 38 sets catalog_item_id = "" immediately after deletion, then line 40 asserts that this empty string is not in the catalog item list. This assertion always passes regardless of whether deletion succeeded, because an empty string will never appear in the ID list.

Risk severity: Critical — The test cannot detect catalog item deletion failures, allowing broken delete operations to pass silently and accumulate leaked resources.

🐛 Proposed fix to verify actual catalog item ID
         grpc.delete_cluster_catalog_item(catalog_item_id=catalog_item_id)
+        deleted_id = catalog_item_id
         catalog_item_id = ""
 
-        assert catalog_item_id not in grpc.list_cluster_catalog_item_ids()
+        assert deleted_id not in grpc.list_cluster_catalog_item_ids()
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@tests/catalog/test_catalog_item_lifecycle.py` around lines 37 - 40, The test
in test_catalog_item_lifecycle.py has a critical logic error where
catalog_item_id is set to an empty string on line 38 immediately after calling
grpc.delete_cluster_catalog_item(), and then line 40 asserts that this empty
string is not in the catalog item list. This assertion always passes regardless
of whether the deletion succeeded, because an empty string will never be in the
ID list. To fix this, move the assertion on line 40 to execute before line 38
reassigns catalog_item_id to an empty string, so the assertion actually verifies
that the original catalog_item_id value has been removed from the list returned
by grpc.list_cluster_catalog_item_ids().

finally:
if catalog_item_id:
grpc.delete_cluster_catalog_item(catalog_item_id=catalog_item_id)


def test_unpublished_catalog_item_not_visible_in_public_api(grpc: GRPCClient, cluster_template: str) -> None:
name = _unique_name("e2e-unpub")
catalog_item_id = grpc.create_cluster_catalog_item(name=name, template=cluster_template, published=False)
try:
assert catalog_item_id not in grpc.list_cluster_catalog_item_ids()

output, rc = grpc.call_unchecked(service="osac.public.v1.ClusterCatalogItems/Get", data={"id": catalog_item_id})
assert rc != 0, f"Expected Get to fail for unpublished item, got: {output}"
assert "not published" in output.lower() or "not found" in output.lower()
Comment thread
coderabbitai[bot] marked this conversation as resolved.
finally:
grpc.delete_cluster_catalog_item(catalog_item_id=catalog_item_id)


def test_create_cluster_with_catalog_item(grpc: GRPCClient, cli: OsacCLI, cluster_template: str) -> None:
name = _unique_name("e2e-cat")
catalog_item_id = grpc.create_cluster_catalog_item(name=name, template=cluster_template, published=True)
cluster_id = ""
try:
cluster_name = _unique_name("e2e-cluster")
cluster_id = cli.create_cluster_with_catalog_item(catalog_item=catalog_item_id, name=cluster_name)

assert cluster_id in grpc.list_cluster_ids()

cluster = grpc.get_cluster(cluster_id=cluster_id)
assert cluster["object"]["spec"]["catalogItem"] == catalog_item_id
finally:
if cluster_id:
cli.delete_cluster(uuid=cluster_id)
_wait_cluster_removed(grpc, cluster_id)
grpc.delete_cluster_catalog_item(catalog_item_id=catalog_item_id)
Comment thread
tzvatot marked this conversation as resolved.


def test_create_cluster_with_unpublished_catalog_item_fails(
grpc: GRPCClient, cli: OsacCLI, cluster_template: str
) -> None:
name = _unique_name("e2e-unpub")
catalog_item_id = grpc.create_cluster_catalog_item(name=name, template=cluster_template, published=False)
try:
cluster_name = _unique_name("e2e-cluster")
with pytest.raises(subprocess.CalledProcessError):
cli.create_cluster_with_catalog_item(catalog_item=catalog_item_id, name=cluster_name)
finally:
grpc.delete_cluster_catalog_item(catalog_item_id=catalog_item_id)


@pytest.mark.xfail(reason="Server does not yet block deletion of referenced catalog items")
def test_delete_catalog_item_blocked_when_referenced(grpc: GRPCClient, cli: OsacCLI, cluster_template: str) -> None:
name = _unique_name("e2e-ref")
catalog_item_id = grpc.create_cluster_catalog_item(name=name, template=cluster_template, published=True)
cluster_id = ""
try:
cluster_name = _unique_name("e2e-cluster")
cluster_id = cli.create_cluster_with_catalog_item(catalog_item=catalog_item_id, name=cluster_name)

output, rc = grpc.call_unchecked(
service="osac.private.v1.ClusterCatalogItems/Delete", data={"id": catalog_item_id}
)
assert rc != 0, f"Expected delete to be blocked, got: {output}"
finally:
if cluster_id:
cli.delete_cluster(uuid=cluster_id)
_wait_cluster_removed(grpc, cluster_id)
grpc.delete_cluster_catalog_item(catalog_item_id=catalog_item_id)
33 changes: 30 additions & 3 deletions tests/core/grpc_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import subprocess
from typing import Any

from tests.core.runner import run
from tests.core.runner import run, run_unchecked

PUBLIC_API: str = "osac.public.v1"
PRIVATE_API: str = "osac.private.v1"
Expand All @@ -16,12 +16,15 @@ def __init__(self, *, address: str, token: str) -> None:
self.address: str = address
self.token: str = token

def call(self, *, service: str, data: dict[str, Any] | None = None) -> dict[str, Any]:
def _build_args(self, *, service: str, data: dict[str, Any] | None = None) -> list[str]:
args: list[str] = ["grpcurl", "-insecure", "-H", f"Authorization: Bearer {self.token}"]
if data is not None:
args.extend(["-d", json.dumps(data)])
args.extend([self.address, service])
return json.loads(run(*args))
return args

def call(self, *, service: str, data: dict[str, Any] | None = None) -> dict[str, Any]:
return json.loads(run(*self._build_args(service=service, data=data)))

def list_compute_instance_ids(self) -> list[str]:
response: dict[str, Any] = self.call(service=f"{PUBLIC_API}.ComputeInstances/List")
Expand Down Expand Up @@ -87,6 +90,9 @@ def list_subnet_ids(self) -> list[str]:
def delete_subnet(self, *, subnet_id: str) -> None:
self.call(service=f"{PUBLIC_API}.Subnets/Delete", data={"id": subnet_id})

def call_unchecked(self, *, service: str, data: dict[str, Any] | None = None) -> tuple[str, int]:
return run_unchecked(*self._build_args(service=service, data=data))

# Cluster operations

def list_cluster_ids(self) -> list[str]:
Expand Down Expand Up @@ -203,3 +209,24 @@ def list_public_ip_attachment_ids(self) -> list[str]:

def delete_public_ip_attachment(self, *, attachment_id: str) -> None:
self.call(service=f"{PUBLIC_API}.PublicIPAttachments/Delete", data={"id": attachment_id})

# ClusterCatalogItem operations

def create_cluster_catalog_item(
self, *, name: str, template: str, published: bool = True, field_definitions: list[dict[str, Any]] | None = None
) -> str:
obj: dict[str, Any] = {"metadata": {"name": name}, "title": name, "template": template, "published": published}
if field_definitions is not None:
obj["field_definitions"] = field_definitions
response: dict[str, Any] = self.call(service=f"{PRIVATE_API}.ClusterCatalogItems/Create", data={"object": obj})
return response["object"]["id"]

def get_cluster_catalog_item(self, *, catalog_item_id: str) -> dict[str, Any]:
return self.call(service=f"{PUBLIC_API}.ClusterCatalogItems/Get", data={"id": catalog_item_id})

def list_cluster_catalog_item_ids(self) -> list[str]:
response: dict[str, Any] = self.call(service=f"{PUBLIC_API}.ClusterCatalogItems/List")
return [item["id"] for item in response.get("items", [])]

def delete_cluster_catalog_item(self, *, catalog_item_id: str) -> None:
self.call(service=f"{PRIVATE_API}.ClusterCatalogItems/Delete", data={"id": catalog_item_id})
19 changes: 11 additions & 8 deletions tests/core/osac_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ def __init__(self, *, binary: str, address: str, token_script: str, namespace: s
def relogin(self) -> None:
run(self.binary, "login", "--address", self._address, "--insecure", "--token-script", self._token_script)

@staticmethod
def _parse_uuid(stdout: str) -> str:
match: re.Match[str] | None = re.search(r"'([^']+)'", stdout)
assert match is not None, f"Failed to parse UUID from CLI output: {stdout}"
return match.group(1)

def create_hub(self, *, hub_id: str, kubeconfig: str) -> None:
run(self.binary, "create", "hub", "--id", hub_id, "--kubeconfig", kubeconfig, "--namespace", self.namespace)

Expand Down Expand Up @@ -79,10 +85,7 @@ def create_compute_instance(
if user_data_secret_ref is not None:
args.extend(["--user-data", user_data_secret_ref])

stdout: str = run(*args)
match: re.Match[str] | None = re.search(r"'([^']+)'", stdout)
assert match is not None, f"Failed to parse UUID from CLI output: {stdout}"
return match.group(1)
return self._parse_uuid(run(*args))

def delete_compute_instance(self, *, uuid: str) -> None:
run(self.binary, "delete", "computeinstance", uuid)
Expand Down Expand Up @@ -111,10 +114,7 @@ def create_cluster(
for key, path in template_parameter_files.items():
args.extend(["-f", f"{key}={path}"])

stdout: str = run(*args)
match: re.Match[str] | None = re.search(r"'([^']+)'", stdout)
assert match is not None, f"Failed to parse UUID from CLI output: {stdout}"
return match.group(1)
return self._parse_uuid(run(*args))

def get(self, resource: str, *, output: str | None = None) -> str:
args: list[str] = [self.binary, "get", resource]
Expand All @@ -128,5 +128,8 @@ def get_cluster_credential(self, credential: str, *, uuid: str) -> str:
def get_unchecked(self, resource: str) -> tuple[str, int]:
return run_unchecked(self.binary, "get", resource)

def create_cluster_with_catalog_item(self, *, catalog_item: str, name: str) -> str:
return self._parse_uuid(run(self.binary, "create", "cluster", "--catalog-item", catalog_item, "--name", name))

def delete_cluster(self, *, uuid: str) -> None:
run(self.binary, "delete", "cluster", uuid)
Loading