From b48b74f8c265b9deaa91f0355ef78cd5ba68811d Mon Sep 17 00:00:00 2001 From: ivanevtimov Date: Tue, 9 Dec 2025 05:27:22 -0800 Subject: [PATCH 1/5] Pluginize docker --- docs/README.md | 1 + docs/docker_backends.md | 161 +++ pyproject.toml | 3 + src/prompt_siren/registry_base.py | 8 +- .../sandbox_managers/docker/__init__.py | 7 +- .../sandbox_managers/docker/contexts.py | 77 +- .../sandbox_managers/docker/exec_utils.py | 60 +- .../sandbox_managers/docker/image_cache.py | 108 +- .../sandbox_managers/docker/local_client.py | 293 +++++ .../sandbox_managers/docker/manager.py | 98 +- .../docker/plugins/__init__.py | 21 + .../docker/plugins/abstract.py | 235 ++++ .../sandbox_managers/docker/plugins/errors.py | 16 + .../docker/plugins/registry.py | 52 + tests/integration/conftest.py | 129 +- tests/integration/test_bash_env.py | 26 +- .../test_docker_manager_integration.py | 1041 ++++++++++++----- .../test_swebench_tools_integration.py | 12 +- .../docker/plugins/__init__.py | 1 + .../docker/plugins/test_local_client.py | 485 ++++++++ tests/sandbox_managers/docker/test_manager.py | 77 +- 21 files changed, 2447 insertions(+), 464 deletions(-) create mode 100644 docs/docker_backends.md create mode 100644 src/prompt_siren/sandbox_managers/docker/local_client.py create mode 100644 src/prompt_siren/sandbox_managers/docker/plugins/__init__.py create mode 100644 src/prompt_siren/sandbox_managers/docker/plugins/abstract.py create mode 100644 src/prompt_siren/sandbox_managers/docker/plugins/errors.py create mode 100644 src/prompt_siren/sandbox_managers/docker/plugins/registry.py create mode 100644 tests/sandbox_managers/docker/plugins/__init__.py create mode 100644 tests/sandbox_managers/docker/plugins/test_local_client.py diff --git a/docs/README.md b/docs/README.md index 60d95ca..ef4ac26 100644 --- a/docs/README.md +++ b/docs/README.md @@ -5,6 +5,7 @@ Documentation for the Siren prompt injection research tool. ## Getting Started - **[Configuration Guide](configuration.md)** - Basic usage and configuration +- **[Docker Backends](docker_backends.md)** - Running with Local Docker - **[Usage Limits](usage_limits.md)** - Resource limits and cost controls - **[Plugins](plugins/README.md)** - Adding custom agents, attacks, environments diff --git a/docs/docker_backends.md b/docs/docker_backends.md new file mode 100644 index 0000000..b853fdc --- /dev/null +++ b/docs/docker_backends.md @@ -0,0 +1,161 @@ +# Docker Execution Backends + +The Prompt Siren Workbench supports Docker execution backends for running containerized tasks. + +## Overview + +Docker execution backends handle the creation, management, and execution of Docker containers for sandbox environments. + +## Local Docker Backend + +The local Docker backend executes containers directly on your machine using the Docker daemon. + +### Requirements + +- Docker installed and running on your machine +- Docker socket accessible (typically `/var/run/docker.sock`) +- `DOCKER_HOST` environment variable set in your `.env` file + +### Setup + +1. **Install Docker** (if not already installed): + ```bash + docker --version + ``` + +2. **Configure environment** in `.env`: + ```bash + DOCKER_HOST="unix:///var/run/docker.sock" + ``` + +3. **Verify Docker is running**: + ```bash + docker ps + ``` + +### Usage + +Local Docker is the default backend and requires no special configuration: + +```bash +# Run with local Docker (default) +uv run --env-file .env prompt-siren run benign +dataset=swebench + +# Or explicitly specify +uv run --env-file .env prompt-siren run benign +dataset=swebench \ + sandbox_manager.config.docker_client=local +``` + +### Advantages + +- **Fast**: No network latency, containers run locally +- **Easy debugging**: Direct access to containers via `docker` CLI +- **No quotas**: Limited only by your machine's resources + +### Limitations + +- Requires Docker daemon running +- Limited by local machine resources +- Cannot run on machines without Docker + +## Hydra Configuration + +You can set the Docker backend in your Hydra configuration file or via command-line overrides. + +### In Configuration File + +Create or modify `config.yaml`: + +```yaml +defaults: + - _self_ + - dataset: swebench + +sandbox_manager: + type: local-docker + config: + docker_client: local +``` + +Then run without needing overrides: + +```bash +uv run --env-file .env prompt-siren run benign --config-dir=./config +``` + +### Via Command-Line Overrides + +Override the backend at runtime: + +```bash +# Explicitly use local Docker +uv run --env-file .env prompt-siren run benign +dataset=swebench \ + sandbox_manager.config.docker_client=local +``` + +## End-to-End Example + +Here's a complete example of running SWE-bench with Docker: + +```bash +# 1. Set up environment +cat > .env <=0.1.35"] swebench = ["swebench", "jinja2>=3.1.6"] diff --git a/src/prompt_siren/registry_base.py b/src/prompt_siren/registry_base.py index 8aa1a6d..f8dc8bd 100644 --- a/src/prompt_siren/registry_base.py +++ b/src/prompt_siren/registry_base.py @@ -212,6 +212,7 @@ def create_component( component_type: str, config: BaseModel | None, context: ContextT | None = None, + **kwargs: Any, ) -> ComponentT: """Create a component instance for a given component type and config. @@ -219,6 +220,7 @@ def create_component( component_type: String identifier for the component type config: Configuration object for the component, or None context: Optional context object passed to factory (e.g., sandbox_manager for datasets) + **kwargs: Additional keyword arguments passed to the factory function. Returns: An instance of the component type @@ -243,14 +245,14 @@ def create_component( config_class, factory = self._registry[component_type] - # Component doesn't use config - call factory with no args + # Component doesn't use config - call factory with only kwargs if config_class is None: if config is not None: raise ValueError( f"{self._component_name.title()} type '{component_type}' doesn't accept config, " f"but config was provided" ) - return factory() + return factory(**kwargs) # Component uses config - validate and call with config and context if config is None: @@ -260,7 +262,7 @@ def create_component( f"Config must be an instance of {config_class.__name__}, got {type(config).__name__}" ) - return factory(config, context) + return factory(config, context, **kwargs) def get_registered_components(self) -> list[str]: """Get a list of all registered component types. diff --git a/src/prompt_siren/sandbox_managers/docker/__init__.py b/src/prompt_siren/sandbox_managers/docker/__init__.py index c9a862a..dd9054f 100644 --- a/src/prompt_siren/sandbox_managers/docker/__init__.py +++ b/src/prompt_siren/sandbox_managers/docker/__init__.py @@ -7,4 +7,9 @@ DockerSandboxManager, ) -__all__ = ["DockerSandboxConfig", "DockerSandboxManager", "create_docker_sandbox_manager"] +__all__ = [ + # Sandbox manager + "DockerSandboxConfig", + "DockerSandboxManager", + "create_docker_sandbox_manager", +] diff --git a/src/prompt_siren/sandbox_managers/docker/contexts.py b/src/prompt_siren/sandbox_managers/docker/contexts.py index 1d2d86a..7d3302f 100644 --- a/src/prompt_siren/sandbox_managers/docker/contexts.py +++ b/src/prompt_siren/sandbox_managers/docker/contexts.py @@ -4,29 +4,24 @@ from __future__ import annotations import asyncio +import logging import uuid from dataclasses import dataclass, field from typing import Any -try: - import aiodocker - from aiodocker.containers import DockerContainer -except ImportError as e: - raise ImportError( - "Docker sandbox manager requires the 'docker' optional dependency. " - "Install with: pip install 'prompt-siren[docker]'" - ) from e - from ..sandbox_state import ContainerID, SandboxState from ..sandbox_task_setup import ContainerSetup, TaskSetup from .image_cache import ImageCache +from .plugins import AbstractContainer, AbstractDockerClient + +logger = logging.getLogger(__name__) @dataclass class ContainerInfo: """Information about a tracked container.""" - container: DockerContainer + container: AbstractContainer temp_image: str | None = None # Temporary image created during cloning @@ -43,7 +38,7 @@ class BatchState: """ batch_id: str - docker_client: aiodocker.Docker + docker_client: AbstractDockerClient image_cache: ImageCache contexts: dict[str, TaskSandboxContext] = field(default_factory=dict) _lock: asyncio.Lock = field(default_factory=asyncio.Lock) @@ -228,7 +223,7 @@ async def _create_network(self, task_setup: TaskSetup, network_enabled: bool) -> "Internal": False, } - network = await self.batch_state.docker_client.networks.create(config=network_config) + network = await self.batch_state.docker_client.create_network(config=network_config) network_info = await network.show() return network_info["Id"] @@ -242,7 +237,7 @@ async def _clone_network(self, source_network_id: str) -> str: New network ID """ # Get source network config - source_network = await self.batch_state.docker_client.networks.get(source_network_id) + source_network = await self.batch_state.docker_client.get_network(source_network_id) source_info = await source_network.show() # Create new network with same config @@ -253,7 +248,7 @@ async def _clone_network(self, source_network_id: str) -> str: "Internal": source_info.get("Internal", False), } - network = await self.batch_state.docker_client.networks.create(config=network_config) + network = await self.batch_state.docker_client.create_network(config=network_config) network_info = await network.show() return network_info["Id"] @@ -275,10 +270,16 @@ async def _create_single_container( Returns: Container ID """ + logger.debug( + f"Creating container for task_id={task_id}, container_name={container_setup.name}, " + f"network_id={network_id}, network_enabled={network_enabled}" + ) + # Get image tag from cache image_tag = await self.batch_state.image_cache.get_image_for_container( container_setup, task_id ) + logger.debug(f"Retrieved image tag from cache: {image_tag}") # Build container config config = self._build_container_config( @@ -287,6 +288,7 @@ async def _create_single_container( network_id=network_id, network_enabled=network_enabled, ) + logger.debug(f"Built container config: {config}") # Generate unique container name safe_task_id = task_id.replace(":", "-").replace("/", "-") @@ -295,26 +297,56 @@ async def _create_single_container( f"{self.batch_state.batch_id}-{self.execution_id}-" f"{container_setup.name}-{safe_task_id}-{unique_suffix}" ) + logger.debug(f"Generated container name: {container_name}") # Create and start container - container = await self.batch_state.docker_client.containers.create( + logger.debug(f"Creating container {container_name}...") + container = await self.batch_state.docker_client.create_container( config, name=container_name ) + logger.debug(f"Starting container {container_name}...") await container.start() container_id: ContainerID = (await container.show())["Id"] + logger.debug(f"Container started with ID: {container_id}") # Check if container exited immediately after starting container_info = await container.show() + logger.debug( + f"Container info after start: State={container_info.get('State')}, " + f"Config={container_info.get('Config')}, " + f"NetworkSettings={container_info.get('NetworkSettings')}" + ) + if not container_info["State"]["Running"]: exit_code = container_info["State"].get("ExitCode", "unknown") + logger.debug(f"Container Info {container_info}") + error_msg = container_info["State"].get("Error", "") + started_at = container_info["State"].get("StartedAt", "") + finished_at = container_info["State"].get("FinishedAt", "") + + logger.debug( + f"Container {container_name} exited immediately. " + f"Full container_info: {container_info}" + ) + # Get container logs to help diagnose the issue logs = await container.log(stdout=True, stderr=True) log_text = "".join(logs) if logs else "(no logs)" + + logger.debug( + f"Container {container_name} details - " + f"exit_code={exit_code}, error={error_msg}, " + f"started_at={started_at}, finished_at={finished_at}, " + f"logs={log_text}" + ) + # Clean up the failed container try: await container.delete() - except Exception: - pass # Best effort cleanup + logger.debug(f"Cleaned up failed container {container_name}") + except Exception as cleanup_error: + logger.debug(f"Failed to clean up container {container_name}: {cleanup_error}") + raise RuntimeError( f"Container {container_name} exited immediately after starting " f"with exit code {exit_code}. Logs:\n{log_text}" @@ -324,6 +356,7 @@ async def _create_single_container( async with self._lock: self._containers[container_id] = ContainerInfo(container=container, temp_image=None) + logger.debug(f"Successfully created and tracked container {container_name}") return container_id async def _clone_container( @@ -340,7 +373,7 @@ async def _clone_container( Returns: Cloned container ID """ - source_container = await self.batch_state.docker_client.containers.get(source_id) + source_container = await self.batch_state.docker_client.get_container(source_id) # Generate unique names clone_id = uuid.uuid4().hex[:8] @@ -387,7 +420,7 @@ async def _clone_container( } # Create and start cloned container - cloned_container = await self.batch_state.docker_client.containers.create( + cloned_container = await self.batch_state.docker_client.create_container( config=config, name=clone_name ) await cloned_container.start() @@ -412,7 +445,7 @@ async def _clone_container( pass # Best effort try: - await self.batch_state.docker_client.images.delete(temp_image_full, force=True) + await self.batch_state.docker_client.delete_image(temp_image_full, force=True) except Exception: pass # Best effort @@ -503,7 +536,7 @@ async def _cleanup_container(self, container_id: ContainerID) -> None: # Remove temporary image if exists if info.temp_image: try: - await self.batch_state.docker_client.images.delete(info.temp_image, force=True) + await self.batch_state.docker_client.delete_image(info.temp_image, force=True) except Exception: pass # Best effort cleanup @@ -514,7 +547,7 @@ async def _cleanup_network(self, network_id: str) -> None: network_id: Network ID to clean up """ try: - network = await self.batch_state.docker_client.networks.get(network_id) + network = await self.batch_state.docker_client.get_network(network_id) await network.delete() except Exception: pass # Best effort cleanup diff --git a/src/prompt_siren/sandbox_managers/docker/exec_utils.py b/src/prompt_siren/sandbox_managers/docker/exec_utils.py index 227cbfb..cb340ff 100644 --- a/src/prompt_siren/sandbox_managers/docker/exec_utils.py +++ b/src/prompt_siren/sandbox_managers/docker/exec_utils.py @@ -7,22 +7,14 @@ from pathlib import Path import anyio -from aiohttp import ClientTimeout -try: - import aiodocker -except ImportError as e: - raise ImportError( - "Docker sandbox manager requires the 'docker' optional dependency. " - "Install with: pip install 'prompt-siren[docker]'" - ) from e - -from ..abstract import ExecOutput, ExecTimeoutError, StderrChunk, StdoutChunk +from ..abstract import ExecOutput, ExecTimeoutError from ..sandbox_state import ContainerID +from .plugins import AbstractDockerClient async def exec_in_container( - docker: aiodocker.Docker, + docker: AbstractDockerClient, container_id: ContainerID, cmd: str | list[str], stdin: str | bytes | None = None, @@ -36,7 +28,7 @@ async def exec_in_container( """Execute a command in a Docker container. Args: - docker: Docker client + docker: Abstract Docker client container_id: Container ID to execute command in cmd: Command to execute (string or list of arguments) stdin: Optional stdin data to pass to the command @@ -54,7 +46,7 @@ async def exec_in_container( ExecTimeoutError: If command execution exceeds timeout """ # Get container - container = await docker.containers.get(container_id) + container = await docker.get_container(container_id) _shell_path = str(shell_path) if shell_path is not None else "/bin/bash" # Normalize command to bash -c format @@ -68,49 +60,15 @@ async def exec_in_container( try: # Use anyio for timeout (Python 3.10 compatible) with anyio.fail_after(timeout_value): - # Create exec instance - exec_instance = await container.exec( + # Execute command in container + return await container.exec( cmd=bash_cmd, - stdout=True, - stderr=True, - stdin=stdin is not None, + stdin=stdin, user=user or "", environment=env, workdir=cwd, + timeout=timeout_value, ) - # Start execution - stream = exec_instance.start(detach=False, timeout=ClientTimeout(total=timeout_value)) - - # Write stdin if provided - if stdin is not None: - if isinstance(stdin, str): - stdin = stdin.encode() - await stream.write_in(stdin) - # Signal EOF on stdin without closing stdout/stderr - # Access transport directly like write_in() does - assert stream._resp is not None - assert stream._resp.connection is not None - transport = stream._resp.connection.transport - if transport and transport.can_write_eof(): - transport.write_eof() - - # Read output - outputs = [] - while True: - msg = await stream.read_out() - if msg is None: - break - decoded = msg.data.decode("utf-8", errors="replace") - if msg.stream == 1: # stdout - outputs.append(StdoutChunk(content=decoded)) - elif msg.stream == 2: # stderr - outputs.append(StderrChunk(content=decoded)) - - # Get exit code - exit_code = (await exec_instance.inspect())["ExitCode"] - - return ExecOutput(outputs=outputs, exit_code=exit_code) - except TimeoutError as e: raise ExecTimeoutError(container_id, cmd, timeout_value) from e diff --git a/src/prompt_siren/sandbox_managers/docker/image_cache.py b/src/prompt_siren/sandbox_managers/docker/image_cache.py index 9cdacd1..c46e654 100644 --- a/src/prompt_siren/sandbox_managers/docker/image_cache.py +++ b/src/prompt_siren/sandbox_managers/docker/image_cache.py @@ -4,22 +4,12 @@ from __future__ import annotations import hashlib -import io -import tarfile +import logging import tempfile from pathlib import Path from typing_extensions import assert_never -try: - import aiodocker - from aiodocker import DockerError -except ImportError as e: - raise ImportError( - "Docker sandbox manager requires the 'docker' optional dependency. " - "Install with: pip install 'prompt-siren[docker]'" - ) from e - from ..image_spec import ( BuildImageSpec, ImageSpec, @@ -28,6 +18,9 @@ PullImageSpec, ) from ..sandbox_task_setup import ContainerSetup +from .plugins import AbstractDockerClient, DockerClientError + +logger = logging.getLogger(__name__) class ImageBuildError(Exception): @@ -46,11 +39,11 @@ class ImageCache: concurrent Docker build race conditions. """ - def __init__(self, docker: aiodocker.Docker, batch_id: str): + def __init__(self, docker: AbstractDockerClient, batch_id: str): """Initialize image cache. Args: - docker: Docker client for all operations + docker: Abstract Docker client for all operations batch_id: Unique identifier for this batch """ self._docker = docker @@ -68,13 +61,22 @@ async def ensure_all_base_images(self, container_setups: list[ContainerSetup]) - Args: container_setups: All container setups from all tasks in the batch """ + logger.debug( + f"[ImageCache] ensure_all_base_images called with {len(container_setups)} container setups" + ) # Collect unique image specs using cache key seen_specs: set[str] = set() for setup in container_setups: cache_key = self._get_cache_key(setup.spec.image_spec) + logger.debug( + f"[ImageCache] Processing setup '{setup.name}' with cache_key: {cache_key}" + ) if cache_key not in seen_specs: seen_specs.add(cache_key) + logger.debug(f"[ImageCache] Preparing base image for cache_key: {cache_key}") await self._prepare_base_image(setup.spec.image_spec) + else: + logger.debug(f"[ImageCache] Skipping duplicate cache_key: {cache_key}") async def get_image_for_container( self, @@ -115,23 +117,37 @@ async def _prepare_base_image(self, spec: ImageSpec) -> ImageTag: Image tag """ cache_key = self._get_cache_key(spec) + logger.debug(f"[ImageCache] _prepare_base_image called for cache_key: {cache_key}") # Check cache first if cache_key in self._base_image_cache: + logger.debug(f"[ImageCache] Found cached image for cache_key: {cache_key}") return self._base_image_cache[cache_key] + logger.debug( + f"[ImageCache] No cached image found, preparing new image for spec type: {type(spec).__name__}" + ) + + tag: ImageTag = "" + # Prepare image based on type match spec: case PullImageSpec(): + logger.debug(f"[ImageCache] Pulling image: {spec.tag}") tag = await self._pull_image(spec) case BuildImageSpec(): + logger.debug( + f"[ImageCache] Building image from spec: tag={spec.tag}, context={spec.context_path}, dockerfile={spec.dockerfile_path}" + ) tag = await self._build_image(spec) case MultiStageBuildImageSpec(): + logger.debug(f"[ImageCache] Building multi-stage image: {spec.final_tag}") tag = await self._build_multi_stage_image(spec) case _: assert_never(spec) # Cache result + logger.debug(f"[ImageCache] Caching result for cache_key: {cache_key}, tag: {tag}") self._base_image_cache[cache_key] = tag return tag @@ -209,12 +225,18 @@ async def _pull_image(self, spec: PullImageSpec) -> ImageTag: Returns: Image tag """ + logger.debug(f"[ImageCache] _pull_image: Checking if image exists locally: {spec.tag}") try: # Check if image exists locally - await self._docker.images.inspect(spec.tag) - except DockerError: + await self._docker.inspect_image(spec.tag) + logger.debug(f"[ImageCache] _pull_image: Image {spec.tag} already exists locally") + except DockerClientError as e: # Image doesn't exist, pull it - await self._docker.images.pull(from_image=spec.tag) + logger.debug( + f"[ImageCache] _pull_image: Image {spec.tag} not found locally (error: {e}), pulling..." + ) + await self._docker.pull_image(spec.tag) + logger.debug(f"[ImageCache] _pull_image: Successfully pulled image {spec.tag}") return spec.tag async def _build_image(self, spec: BuildImageSpec) -> ImageTag: @@ -226,19 +248,33 @@ async def _build_image(self, spec: BuildImageSpec) -> ImageTag: Returns: Image tag """ + logger.debug(f"[ImageCache] _build_image: Checking if image exists: {spec.tag}") + logger.debug(f"[ImageCache] _build_image: Build context path: {spec.context_path}") + logger.debug(f"[ImageCache] _build_image: Dockerfile path: {spec.dockerfile_path}") + logger.debug(f"[ImageCache] _build_image: Build args: {spec.build_args}") + # Check if image exists try: - await self._docker.images.inspect(spec.tag) + logger.debug(f"[ImageCache] _build_image: Attempting to inspect image {spec.tag}") + await self._docker.inspect_image(spec.tag) + logger.debug( + f"[ImageCache] _build_image: Image {spec.tag} already exists, skipping build" + ) return spec.tag - except DockerError: - pass # Image doesn't exist, build it + except DockerClientError as e: + logger.debug( + f"[ImageCache] _build_image: Image {spec.tag} not found (error: {e}), proceeding with build" + ) + # Image doesn't exist, build it + logger.debug(f"[ImageCache] _build_image: Starting build for image {spec.tag}") await self._build_from_context( context_path=spec.context_path, tag=spec.tag, dockerfile_path=spec.dockerfile_path, build_args=spec.build_args, ) + logger.debug(f"[ImageCache] _build_image: Successfully built image {spec.tag}") return spec.tag async def _build_multi_stage_image(self, spec: MultiStageBuildImageSpec) -> ImageTag: @@ -253,9 +289,9 @@ async def _build_multi_stage_image(self, spec: MultiStageBuildImageSpec) -> Imag for stage in spec.stages: # Check if stage image exists try: - await self._docker.images.inspect(stage.tag) + await self._docker.inspect_image(stage.tag) continue # Stage already built - except DockerError: + except DockerClientError: pass # Need to build this stage # Prepare build args @@ -284,6 +320,9 @@ async def _build_from_context( ) -> None: """Build a Docker image from a build context. + The tar archive creation and file transfer is handled by the + Docker client implementation. + Args: context_path: Path to the build context directory tag: Tag for the built image @@ -293,30 +332,35 @@ async def _build_from_context( Raises: ImageBuildError: If build fails """ - # Create tar archive of build context - tar_stream = io.BytesIO() - with tarfile.open(fileobj=tar_stream, mode="w") as tar: - tar.add(context_path, arcname=".") - tar_stream.seek(0) + logger.debug( + f"[ImageCache] _build_from_context: Building image {tag} from context {context_path}" + ) + logger.debug(f"[ImageCache] _build_from_context: Dockerfile path: {dockerfile_path}") + logger.debug(f"[ImageCache] _build_from_context: Build args: {build_args}") errors = [] - # Stream build output - async for log_line in self._docker.images.build( - fileobj=tar_stream, + # Stream build output from client + async for log_line in self._docker.build_image( + context_path=context_path, tag=tag, - encoding="application/x-tar", - stream=True, - path_dockerfile=dockerfile_path or "Dockerfile", + dockerfile_path=dockerfile_path, buildargs=build_args, ): + logger.debug(f"[ImageCache] _build_from_context: Build log: {log_line}") if "error" in log_line: error = log_line["error"] + logger.error(f"[ImageCache] _build_from_context: Build error: {error}") errors.append(error) if errors: + logger.error( + f"[ImageCache] _build_from_context: Build failed with {len(errors)} errors" + ) raise ImageBuildError(tag, errors) + logger.debug(f"[ImageCache] _build_from_context: Build completed successfully for {tag}") + @staticmethod def _get_cache_key(spec: ImageSpec) -> str: """Generate a cache key for an image spec. diff --git a/src/prompt_siren/sandbox_managers/docker/local_client.py b/src/prompt_siren/sandbox_managers/docker/local_client.py new file mode 100644 index 0000000..bb3afac --- /dev/null +++ b/src/prompt_siren/sandbox_managers/docker/local_client.py @@ -0,0 +1,293 @@ +"""Local Docker client implementation using aiodocker.""" + +from __future__ import annotations + +import io +import tarfile +from collections.abc import AsyncIterator +from typing import Any + +try: + import aiodocker + from aiodocker.containers import DockerContainer + from aiodocker.exceptions import DockerError + from aiodocker.networks import DockerNetwork +except ImportError as e: + raise ImportError( + "The local Docker client requires the 'docker' optional dependency. " + "Install with: pip install 'prompt-siren[docker]'" + ) from e + +from aiohttp import ClientTimeout +from pydantic import BaseModel + +from .. import ExecOutput, StderrChunk, StdoutChunk +from .plugins import AbstractContainer, AbstractDockerClient, AbstractNetwork +from .plugins.errors import DockerClientError + + +class LocalDockerClientConfig(BaseModel): + """Configuration for local Docker client (no special config needed).""" + + +class LocalContainer(AbstractContainer): + """Local implementation of container using aiodocker.""" + + def __init__(self, container: DockerContainer): + self._container = container + + async def start(self) -> None: + """Start the container.""" + try: + await self._container.start() + except DockerError as e: + raise DockerClientError(f"Failed to start container: {e.message}") from e + + async def stop(self) -> None: + """Stop the container.""" + try: + await self._container.stop() + except DockerError as e: + raise DockerClientError(f"Failed to stop container: {e.message}") from e + + async def delete(self) -> None: + """Delete the container.""" + try: + await self._container.delete() + except DockerError as e: + raise DockerClientError(f"Failed to delete container: {e.message}") from e + + async def show(self) -> dict[str, Any]: + """Get container details.""" + try: + return await self._container.show() + except DockerError as e: + raise DockerClientError(f"Failed to get container details: {e.message}") from e + + async def exec( + self, + cmd: list[str], + stdin: str | bytes | None, + user: str, + environment: dict[str, str] | None, + workdir: str | None, + timeout: int, + ) -> ExecOutput: + """Execute a command in the container with streaming I/O. + + Args: + cmd: Command to execute (already in bash -c format) + stdin: Optional stdin data to pass to the command + user: User to run as + environment: Environment variables + workdir: Working directory + timeout: Timeout in seconds + + Returns: + ExecOutput containing stdout/stderr chunks and exit code + """ + try: + # Create exec instance + exec_instance = await self._container.exec( + cmd=cmd, + stdout=True, + stderr=True, + stdin=stdin is not None, + user=user, + environment=environment, + workdir=workdir, + ) + + # Start execution + stream = exec_instance.start(detach=False, timeout=ClientTimeout(total=timeout)) + + # Write stdin if provided + if stdin is not None: + if isinstance(stdin, str): + stdin_bytes = stdin.encode() + else: + stdin_bytes = stdin + await stream.write_in(stdin_bytes) + # Signal EOF on stdin without closing stdout/stderr + # Access transport directly like aiodocker's write_in() does + if stream._resp is not None and stream._resp.connection is not None: + transport = stream._resp.connection.transport + if transport and transport.can_write_eof(): + transport.write_eof() + + # Read output + outputs = [] + while True: + msg = await stream.read_out() + if msg is None: + break + decoded = msg.data.decode("utf-8", errors="replace") + if msg.stream == 1: # stdout + outputs.append(StdoutChunk(content=decoded)) + elif msg.stream == 2: # stderr + outputs.append(StderrChunk(content=decoded)) + + # Get exit code + exit_code = (await exec_instance.inspect())["ExitCode"] + + return ExecOutput(outputs=outputs, exit_code=exit_code) + except DockerError as e: + raise DockerClientError(f"Failed to execute command in container: {e.message}") from e + + async def log(self, stdout: bool, stderr: bool) -> list[str]: + """Get container logs.""" + try: + return await self._container.log(stdout=stdout, stderr=stderr) + except DockerError as e: + raise DockerClientError(f"Failed to get container logs: {e.message}") from e + + async def commit(self, repository: str, tag: str) -> None: + """Commit container to an image.""" + try: + await self._container.commit(repository=repository, tag=tag) + except DockerError as e: + raise DockerClientError( + f"Failed to commit container to {repository}:{tag}: {e.message}" + ) from e + + +class LocalNetwork(AbstractNetwork): + """Local implementation of network using aiodocker.""" + + def __init__(self, network: DockerNetwork): + self._network = network + + async def show(self) -> dict[str, Any]: + """Get network details.""" + try: + return await self._network.show() + except DockerError as e: + raise DockerClientError(f"Failed to get network details: {e.message}") from e + + async def delete(self) -> None: + """Delete the network.""" + try: + await self._network.delete() + except DockerError as e: + raise DockerClientError(f"Failed to delete network: {e.message}") from e + + +class LocalDockerClient(AbstractDockerClient): + """Local Docker client implementation using aiodocker. + + This implementation wraps aiodocker to provide Docker operations on the local machine. + """ + + def __init__(self): + """Initialize local Docker client.""" + self._docker = aiodocker.Docker() + + async def close(self) -> None: + """Close the Docker client.""" + await self._docker.close() + + # Image operations + + async def inspect_image(self, tag: str) -> dict[str, Any]: + """Inspect an image.""" + try: + return await self._docker.images.inspect(tag) + except DockerError as e: + raise DockerClientError(f"Failed to inspect image {tag}: {e.message}") from e + + async def pull_image(self, tag: str) -> None: + """Pull an image from registry.""" + try: + await self._docker.images.pull(from_image=tag) + except DockerError as e: + raise DockerClientError(f"Failed to pull image {tag}: {e.message}") from e + + async def build_image( + self, + context_path: str, + tag: str, + dockerfile_path: str | None = None, + buildargs: dict[str, str] | None = None, + ) -> AsyncIterator[dict[str, Any]]: + """Build an image from a build context directory. + + Creates a tar archive of the context and sends it to Docker. + """ + # Create tar archive of build context with normalized ownership + tar_stream = io.BytesIO() + + def reset_ownership(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo: + """Reset ownership to root to avoid user namespace issues.""" + tarinfo.uid = 0 + tarinfo.gid = 0 + tarinfo.uname = "root" + tarinfo.gname = "root" + return tarinfo + + with tarfile.open(fileobj=tar_stream, mode="w") as tar: + tar.add(context_path, arcname=".", filter=reset_ownership) + tar_stream.seek(0) + + # Build image from tar archive + try: + async for log_line in self._docker.images.build( + fileobj=tar_stream, + tag=tag, + encoding="application/x-tar", + stream=True, + path_dockerfile=dockerfile_path or "Dockerfile", + buildargs=buildargs, + ): + yield log_line + except DockerError as e: + raise DockerClientError(f"Failed to build image {tag}: {e.message}") from e + + async def delete_image(self, tag: str, force: bool = False) -> None: + """Delete an image.""" + try: + await self._docker.images.delete(tag, force=force) + except DockerError as e: + raise DockerClientError(f"Failed to delete image {tag}: {e.message}") from e + + # Container operations + + async def create_container(self, config: dict[str, Any], name: str) -> AbstractContainer: + """Create a container.""" + try: + container = await self._docker.containers.create(config, name=name) + return LocalContainer(container) + except DockerError as e: + raise DockerClientError(f"Failed to create container {name}: {e.message}") from e + + async def get_container(self, container_id: str) -> AbstractContainer: + """Get a container by ID.""" + try: + container = await self._docker.containers.get(container_id) + return LocalContainer(container) + except DockerError as e: + raise DockerClientError(f"Failed to get container {container_id}: {e.message}") from e + + # Network operations + + async def create_network(self, config: dict[str, Any]) -> AbstractNetwork: + """Create a network.""" + try: + network = await self._docker.networks.create(config=config) + return LocalNetwork(network) + except DockerError as e: + raise DockerClientError(f"Failed to create network: {e.message}") from e + + async def get_network(self, network_id: str) -> AbstractNetwork: + """Get a network by ID.""" + try: + network = await self._docker.networks.get(network_id) + return LocalNetwork(network) + except DockerError as e: + raise DockerClientError(f"Failed to get network {network_id}: {e.message}") from e + + +def create_local_docker_client( + config: LocalDockerClientConfig, context: None = None +) -> LocalDockerClient: + """Factory function to create a LocalDockerClient instance.""" + return LocalDockerClient() diff --git a/src/prompt_siren/sandbox_managers/docker/manager.py b/src/prompt_siren/sandbox_managers/docker/manager.py index cef5758..633d8c3 100644 --- a/src/prompt_siren/sandbox_managers/docker/manager.py +++ b/src/prompt_siren/sandbox_managers/docker/manager.py @@ -3,27 +3,52 @@ from __future__ import annotations +import logging import uuid from collections.abc import AsyncIterator, Sequence from contextlib import asynccontextmanager from pathlib import Path +from typing import Any from pydantic import BaseModel, Field -try: - import aiodocker -except ImportError as e: - raise ImportError( - "Docker sandbox manager requires the 'docker' optional dependency. " - "Install with: pip install 'prompt-siren[docker]'" - ) from e - from ..abstract import ExecOutput from ..sandbox_state import ContainerID, SandboxState from ..sandbox_task_setup import ContainerSetup, SandboxTaskSetup from .contexts import BatchState, TaskSandboxContext from .exec_utils import exec_in_container from .image_cache import ImageCache +from .plugins import ( + AbstractDockerClient, + create_docker_client, + get_docker_client_config_class, +) + +logger = logging.getLogger(__name__) + + +def create_docker_client_from_config(client_type: str, config: dict) -> AbstractDockerClient: + """Create a Docker client instance from configuration. + + Follows the same pattern as registry_bridge.py: + 1. Look up the config class from the registry + 2. Validate the config dict against the Pydantic model + 3. Create the component using the factory + + Args: + client_type: The Docker client type (e.g., "local") + config: Configuration dictionary for the client + + Returns: + Configured Docker client instance + + Raises: + RuntimeError: If client type is not registered + ValidationError: If configuration is invalid + """ + config_class = get_docker_client_config_class(client_type) + validated_config = config_class.model_validate(config) + return create_docker_client(client_type, validated_config) class DockerSandboxConfig(BaseModel): @@ -34,6 +59,14 @@ class DockerSandboxConfig(BaseModel): description="Whether to enable network access in containers", ) batch_id_prefix: str = "workbench" + docker_client: str = Field( + default="local", + description="Name of the Docker client plugin to use", + ) + docker_client_config: dict[str, Any] = Field( + default_factory=dict, + description="Configuration for the Docker client plugin", + ) class DockerSandboxManager: @@ -55,19 +88,17 @@ def __init__(self, config: DockerSandboxConfig): """Initialize Docker sandbox manager. Args: - batch_id_prefix: Prefix for batch ID generation - network_enabled: Whether networking is enabled for containers + config: Docker sandbox configuration """ - self._batch_id_prefix = config.batch_id_prefix - self._network_enabled = config.network_enabled + self._config = config self._batch_state: BatchState | None = None @asynccontextmanager async def setup_batch(self, task_setups: Sequence[SandboxTaskSetup]) -> AsyncIterator[None]: """Prepare all images and resources for the batch. - Creates Docker client, builds/pulls all images sequentially, - and tracks all task contexts for cleanup. + Creates Docker client based on config, builds/pulls all images + sequentially, and tracks all task contexts for cleanup. Args: task_setups: All task setups for this batch @@ -75,17 +106,29 @@ async def setup_batch(self, task_setups: Sequence[SandboxTaskSetup]) -> AsyncIte Yields: Control for task execution """ + logger.debug( + f"[DockerSandboxManager] setup_batch: Starting with {len(task_setups)} task setups" + ) # Generate unique batch ID - batch_id = f"{self._batch_id_prefix}-{uuid.uuid4().hex[:8]}" + batch_id = f"{self._config.batch_id_prefix}-{uuid.uuid4().hex[:8]}" + logger.debug(f"[DockerSandboxManager] setup_batch: Generated batch_id: {batch_id}") - # Create Docker client - docker_client = aiodocker.Docker() + # Create appropriate Docker client based on configuration + logger.debug( + f"[DockerSandboxManager] setup_batch: Creating Docker client '{self._config.docker_client}'" + ) + docker_client = create_docker_client_from_config( + self._config.docker_client, + self._config.docker_client_config, + ) try: # Create image cache + logger.debug("[DockerSandboxManager] setup_batch: Creating image cache") image_cache = ImageCache(docker_client, batch_id) # Create batch state + logger.debug("[DockerSandboxManager] setup_batch: Creating batch state") self._batch_state = BatchState( batch_id=batch_id, docker_client=docker_client, @@ -99,11 +142,25 @@ async def setup_batch(self, task_setups: Sequence[SandboxTaskSetup]) -> AsyncIte all_container_setups.append(task_setup.agent_container) all_container_setups.extend(task_setup.service_containers.values()) + logger.debug( + f"[DockerSandboxManager] setup_batch: Collected {len(all_container_setups)} container setups" + ) + for idx, setup in enumerate(all_container_setups): + logger.debug( + f"[DockerSandboxManager] setup_batch: Container setup {idx}: name='{setup.name}', image_spec={type(setup.spec.image_spec).__name__}" + ) + # Build/pull all base images sequentially + logger.debug("[DockerSandboxManager] setup_batch: Calling ensure_all_base_images") await image_cache.ensure_all_base_images(all_container_setups) + logger.debug("[DockerSandboxManager] setup_batch: ensure_all_base_images completed") # Yield control for task execution + logger.debug("[DockerSandboxManager] setup_batch: Yielding control for task execution") yield + logger.debug( + "[DockerSandboxManager] setup_batch: Task execution completed, starting cleanup" + ) finally: # Cleanup all task contexts @@ -111,12 +168,17 @@ async def setup_batch(self, task_setups: Sequence[SandboxTaskSetup]) -> AsyncIte async with self._batch_state._lock: contexts = list(self._batch_state.contexts.values()) + logger.debug( + f"[DockerSandboxManager] setup_batch: Cleaning up {len(contexts)} task contexts" + ) for context in contexts: await context.cleanup() # Close Docker client + logger.debug("[DockerSandboxManager] setup_batch: Closing Docker client") await docker_client.close() self._batch_state = None + logger.debug("[DockerSandboxManager] setup_batch: Cleanup completed") @asynccontextmanager async def setup_task(self, task_setup: SandboxTaskSetup) -> AsyncIterator[SandboxState]: @@ -151,7 +213,7 @@ async def setup_task(self, task_setup: SandboxTaskSetup) -> AsyncIterator[Sandbo try: # Create containers and network sandbox_state = await context.create_containers( - task_setup, network_enabled=self._network_enabled + task_setup, network_enabled=self._config.network_enabled ) # Yield sandbox state diff --git a/src/prompt_siren/sandbox_managers/docker/plugins/__init__.py b/src/prompt_siren/sandbox_managers/docker/plugins/__init__.py new file mode 100644 index 0000000..8185d7a --- /dev/null +++ b/src/prompt_siren/sandbox_managers/docker/plugins/__init__.py @@ -0,0 +1,21 @@ +"""Docker client plugins.""" + +from .abstract import AbstractContainer, AbstractDockerClient, AbstractNetwork +from .errors import DockerClientError +from .registry import ( + create_docker_client, + get_docker_client_config_class, + get_registered_docker_clients, + register_docker_client, +) + +__all__ = [ + "AbstractContainer", + "AbstractDockerClient", + "AbstractNetwork", + "DockerClientError", + "create_docker_client", + "get_docker_client_config_class", + "get_registered_docker_clients", + "register_docker_client", +] diff --git a/src/prompt_siren/sandbox_managers/docker/plugins/abstract.py b/src/prompt_siren/sandbox_managers/docker/plugins/abstract.py new file mode 100644 index 0000000..1c3cc7b --- /dev/null +++ b/src/prompt_siren/sandbox_managers/docker/plugins/abstract.py @@ -0,0 +1,235 @@ +"""Abstract Docker client interface for supporting multiple execution backends. + +This module provides abstractions for Docker operations that can be implemented +by different backends. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from collections.abc import AsyncIterator +from typing import Any + +from ...abstract import ExecOutput + + +class AbstractContainer(ABC): + """Abstract interface for Docker containers.""" + + @abstractmethod + async def start(self) -> None: + """Start the container.""" + ... + + @abstractmethod + async def stop(self) -> None: + """Stop the container.""" + ... + + @abstractmethod + async def delete(self) -> None: + """Delete the container.""" + ... + + @abstractmethod + async def show(self) -> dict[str, Any]: + """Get container details. + + Returns: + Dict with container information including Id, State, Config, HostConfig + """ + ... + + @abstractmethod + async def exec( + self, + cmd: list[str], + stdin: str | bytes | None, + user: str, + environment: dict[str, str] | None, + workdir: str | None, + timeout: int, + ) -> ExecOutput: + """Execute a command in the container. + + Args: + cmd: Command to execute (already in bash -c format) + stdin: Optional stdin data to pass to the command + user: User to run as + environment: Environment variables + workdir: Working directory + timeout: Timeout in seconds + + Returns: + ExecOutput containing stdout/stderr chunks and exit code + """ + ... + + @abstractmethod + async def log(self, stdout: bool, stderr: bool) -> list[str]: + """Get container logs. + + Args: + stdout: Include stdout + stderr: Include stderr + + Returns: + List of log lines + """ + ... + + @abstractmethod + async def commit(self, repository: str, tag: str) -> None: + """Commit container to an image. + + Args: + repository: Repository name + tag: Image tag + """ + ... + + +class AbstractNetwork(ABC): + """Abstract interface for Docker networks.""" + + @abstractmethod + async def show(self) -> dict[str, Any]: + """Get network details. + + Returns: + Dict with network information including Id, Driver, Internal + """ + ... + + @abstractmethod + async def delete(self) -> None: + """Delete the network.""" + ... + + +class AbstractDockerClient(ABC): + """Abstract Docker client interface. + + This interface provides all Docker operations needed by the workbench, + abstracting away the underlying implementation. + """ + + @abstractmethod + async def close(self) -> None: + """Close the client and clean up resources.""" + ... + + # Image operations + + @abstractmethod + async def inspect_image(self, tag: str) -> dict[str, Any]: + """Inspect an image. + + Args: + tag: Image tag + + Returns: + Image details + + Raises: + DockerClientError: If image doesn't exist + """ + ... + + @abstractmethod + async def pull_image(self, tag: str) -> None: + """Pull an image from registry. + + Args: + tag: Image tag to pull + """ + ... + + @abstractmethod + async def build_image( + self, + context_path: str, + tag: str, + dockerfile_path: str | None = None, + buildargs: dict[str, str] | None = None, + ) -> AsyncIterator[dict[str, Any]]: + """Build an image from a build context directory. + + The implementation handles creating tar archives and transferring files + as needed for the specific backend (local or remote). + + Args: + context_path: Path to the build context directory + tag: Tag for built image + dockerfile_path: Path to Dockerfile relative to context (defaults to "Dockerfile") + buildargs: Build arguments + + Yields: + Build log entries (dicts with "stream", "error", etc.) + """ + # This is an abstract async generator - implementations must yield log entries + yield {} # type: ignore[unreachable] + ... + + @abstractmethod + async def delete_image(self, tag: str, force: bool = False) -> None: + """Delete an image. + + Args: + tag: Image tag to delete + force: Force deletion + """ + ... + + # Container operations + + @abstractmethod + async def create_container(self, config: dict[str, Any], name: str) -> AbstractContainer: + """Create a container. + + Args: + config: Container configuration + name: Container name + + Returns: + Container instance + """ + ... + + @abstractmethod + async def get_container(self, container_id: str) -> AbstractContainer: + """Get a container by ID. + + Args: + container_id: Container ID + + Returns: + Container instance + """ + ... + + # Network operations + + @abstractmethod + async def create_network(self, config: dict[str, Any]) -> AbstractNetwork: + """Create a network. + + Args: + config: Network configuration + + Returns: + Network instance + """ + ... + + @abstractmethod + async def get_network(self, network_id: str) -> AbstractNetwork: + """Get a network by ID. + + Args: + network_id: Network ID + + Returns: + Network instance + """ + ... diff --git a/src/prompt_siren/sandbox_managers/docker/plugins/errors.py b/src/prompt_siren/sandbox_managers/docker/plugins/errors.py new file mode 100644 index 0000000..33d4481 --- /dev/null +++ b/src/prompt_siren/sandbox_managers/docker/plugins/errors.py @@ -0,0 +1,16 @@ +"""Common Docker client exceptions.""" + +from __future__ import annotations + + +class DockerClientError(Exception): + """Error executing Docker client command. + + This exception is raised by any Docker client implementation when a Docker + operation fails. + """ + + def __init__(self, message: str, stdout: str = "", stderr: str = ""): + self.stdout = stdout + self.stderr = stderr + super().__init__(f"{message}\nstdout: {stdout}\nstderr: {stderr}") diff --git a/src/prompt_siren/sandbox_managers/docker/plugins/registry.py b/src/prompt_siren/sandbox_managers/docker/plugins/registry.py new file mode 100644 index 0000000..47d061c --- /dev/null +++ b/src/prompt_siren/sandbox_managers/docker/plugins/registry.py @@ -0,0 +1,52 @@ +"""Docker client registry for managing Docker client plugins. + +This module provides a registry system for Docker client implementations, +allowing different backends to be registered and discovered via entry points. +""" + +from collections.abc import Callable +from typing import TypeAlias, TypeVar + +from pydantic import BaseModel + +from ....registry_base import BaseRegistry +from .abstract import AbstractDockerClient + +ConfigT = TypeVar("ConfigT", bound=BaseModel) + +# Type alias for Docker client factory functions that accept optional kwargs +DockerClientFactory: TypeAlias = Callable[[ConfigT], AbstractDockerClient] + +# Create a global Docker client registry instance using BaseRegistry +docker_client_registry = BaseRegistry[AbstractDockerClient, None]( + "docker_client", "prompt_siren.docker_clients" +) + + +def register_docker_client( + client_name: str, config_class: type[ConfigT], factory: DockerClientFactory +) -> None: + """Register a Docker client with its factory function.""" + docker_client_registry.register(client_name, config_class=config_class, factory=factory) + + +def get_docker_client_config_class(client_name: str) -> type[BaseModel]: + """Get the configuration class for a Docker client type""" + config_class = docker_client_registry.get_config_class(client_name) + if config_class is None: + raise RuntimeError(f"Docker client type '{client_name}' must have a config class") + return config_class + + +def create_docker_client(client_name: str, config: BaseModel) -> AbstractDockerClient: + """Create a Docker client instance from a configuration.""" + return docker_client_registry.create_component(client_name, config) + + +def get_registered_docker_clients() -> list[str]: + """Get list of all registered Docker client names. + + Returns: + List of registered client names + """ + return docker_client_registry.get_registered_components() diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ee64840..75a0a25 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,10 +1,35 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -"""Pytest fixtures for Docker integration tests.""" +"""Pytest fixtures for Docker integration tests. + +External tests use local Docker client only. +""" from collections.abc import AsyncIterator +from typing import Protocol import pytest -from aiodocker import Docker +from prompt_siren.sandbox_managers.docker.manager import ( + create_docker_client_from_config, + DockerSandboxConfig, +) +from prompt_siren.sandbox_managers.docker.plugins import AbstractDockerClient +from prompt_siren.sandbox_managers.docker.plugins.errors import DockerClientError + + +class ManagerConfigFactory(Protocol): + """Protocol for create_manager_config fixture callable. + + This properly types the callable with optional parameters, unlike + Callable[[str, bool, str | list[str] | None], DockerSandboxConfig] + which requires all arguments to be positional. + """ + + def __call__( + self, + docker_client_type: str, + network_enabled: bool = False, + test_images: str | list[str] | None = None, + ) -> DockerSandboxConfig: ... @pytest.fixture(scope="module") @@ -14,30 +39,114 @@ def anyio_backend() -> str: @pytest.fixture(scope="module") -async def docker_client() -> AsyncIterator[Docker]: +def docker_client_type() -> str: + """Docker client type for external tests. + + External tests always use local Docker client. + """ + return "local" + + +@pytest.fixture(scope="module") +def skip_if_des_unavailable(docker_client_type: str): + """Skip test if requested Docker client is unavailable. + + This is a no-op for external tests (always local). + Kept for compatibility with shared test code. + """ + + +@pytest.fixture(scope="module") +def create_manager_config() -> ManagerConfigFactory: + """Factory fixture for creating DockerSandboxConfig. + + External version only supports local Docker. + + Returns: + A callable that creates DockerSandboxConfig instances + """ + + def _create_config( + docker_client_type: str, + network_enabled: bool = False, + test_images: str | list[str] | None = None, + ) -> DockerSandboxConfig: + """Create DockerSandboxConfig for external tests (local only). + + Args: + docker_client_type: Client type ('local' for external) + network_enabled: Whether to enable network access + test_images: Test image(s) - unused in external tests + + Returns: + DockerSandboxConfig configured for local Docker + + Raises: + ValueError: If docker_client_type is not 'local' (in external) + """ + if docker_client_type != "local": + raise ValueError( + f"External tests only support 'local' client type. Got: {docker_client_type}" + ) + + return DockerSandboxConfig(network_enabled=network_enabled, docker_client="local") + + return _create_config + + +@pytest.fixture(scope="module") +async def docker_client() -> AsyncIterator[AbstractDockerClient]: """Provide a shared Docker client for all integration tests in the module. Module-scoped to avoid creating a new connection for each test. + External tests always use local Docker client plugin via registry. """ - docker = Docker() + client = create_docker_client_from_config("local", {}) try: - yield docker + yield client finally: - await docker.close() + await client.close() @pytest.fixture(scope="module") -async def test_image(docker_client: Docker): +async def test_image(docker_client: AbstractDockerClient): """Pull the test image once for all tests in the module. Uses debian:bookworm-slim for small size with bash support. Module-scoped to avoid repeated image checks. """ image = "debian:bookworm-slim" + # Check if image exists locally before attempting pull (avoids rate limits) try: - await docker_client.images.inspect(image) - except Exception: + await docker_client.inspect_image(image) + except DockerClientError: # Image doesn't exist, pull it - await docker_client.images.pull(from_image=image) + await docker_client.pull_image(image) return image + + +async def _ensure_image_available(docker_client: AbstractDockerClient, image: str) -> None: + """Ensure an image is available locally, pulling if necessary.""" + try: + await docker_client.inspect_image(image) + except DockerClientError: + # Image doesn't exist, pull it + await docker_client.pull_image(image) + + +@pytest.fixture(scope="module") +async def multistage_test_images(docker_client: AbstractDockerClient): + """Pull the images needed for multi-stage build tests. + + Returns a list of images needed for multi-stage builds. + Module-scoped to avoid repeated image checks. + """ + images = [ + "debian:bookworm-slim", + "alpine:3.19", + ] + + for image in images: + await _ensure_image_available(docker_client, image) + return images diff --git a/tests/integration/test_bash_env.py b/tests/integration/test_bash_env.py index 7854cfa..e730a50 100644 --- a/tests/integration/test_bash_env.py +++ b/tests/integration/test_bash_env.py @@ -94,7 +94,7 @@ async def test_create_env_state_for_single_container_task( # Verify container is actually running assert bash_env._sandbox_manager._batch_state is not None docker = bash_env._sandbox_manager._batch_state.docker_client - container = await docker.containers.get(env_state.agent_container_id) + container = await docker.get_container(env_state.agent_container_id) container_info = await container.show() assert container_info["State"]["Running"] is True @@ -133,8 +133,8 @@ async def test_clone_single_container_env_state( # Verify both containers are running assert bash_env._sandbox_manager._batch_state is not None docker = bash_env._sandbox_manager._batch_state.docker_client - original_container = await docker.containers.get(original_container_id) - cloned_container = await docker.containers.get(cloned_env_state.agent_container_id) + original_container = await docker.get_container(original_container_id) + cloned_container = await docker.get_container(cloned_env_state.agent_container_id) original_info = await original_container.show() cloned_info = await cloned_container.show() @@ -197,8 +197,8 @@ async def test_create_env_state_for_task_couple( assert bash_env._sandbox_manager._batch_state is not None docker = bash_env._sandbox_manager._batch_state.docker_client - benign_container = await docker.containers.get(env_state.agent_container_id) - attack_container = await docker.containers.get(attack_container_id) + benign_container = await docker.get_container(env_state.agent_container_id) + attack_container = await docker.get_container(attack_container_id) benign_info = await benign_container.show() attack_info = await attack_container.show() @@ -285,8 +285,8 @@ async def test_clone_multi_container_env_state( assert bash_env._sandbox_manager._batch_state is not None docker = bash_env._sandbox_manager._batch_state.docker_client - cloned_benign = await docker.containers.get(cloned_env_state.agent_container_id) - cloned_attack = await docker.containers.get(cloned_attack_id) + cloned_benign = await docker.get_container(cloned_env_state.agent_container_id) + cloned_attack = await docker.get_container(cloned_attack_id) cloned_benign_info = await cloned_benign.show() cloned_attack_info = await cloned_attack.show() @@ -356,7 +356,7 @@ async def test_build_modified_image( assert bash_env._sandbox_manager._batch_state is not None docker = bash_env._sandbox_manager._batch_state.docker_client - agent_container = await docker.containers.get(env_state.agent_container_id) + agent_container = await docker.get_container(env_state.agent_container_id) container_info = await agent_container.show() image_name = container_info["Config"]["Image"] @@ -419,14 +419,14 @@ async def test_multiple_service_containers(self, sandbox_manager: DockerSandboxM assert bash_env._sandbox_manager._batch_state is not None docker = bash_env._sandbox_manager._batch_state.docker_client - agent_container = await docker.containers.get(env_state.agent_container_id) - attack_container = await docker.containers.get( + agent_container = await docker.get_container(env_state.agent_container_id) + attack_container = await docker.get_container( env_state.sandbox_state.service_containers["attack_server"] ) - db_container = await docker.containers.get( + db_container = await docker.get_container( env_state.sandbox_state.service_containers["database"] ) - cache_container = await docker.containers.get( + cache_container = await docker.get_container( env_state.sandbox_state.service_containers["cache"] ) @@ -633,7 +633,7 @@ async def test_http_request_to_service_container( assert bash_env._sandbox_manager._batch_state is not None docker = bash_env._sandbox_manager._batch_state.docker_client service_id = env_state.sandbox_state.service_containers["http_server"] - service_container = await docker.containers.get(service_id) + service_container = await docker.get_container(service_id) service_info = await service_container.show() # If container crashed, get logs to understand why diff --git a/tests/integration/test_docker_manager_integration.py b/tests/integration/test_docker_manager_integration.py index 3c0f3c7..2d5ed25 100644 --- a/tests/integration/test_docker_manager_integration.py +++ b/tests/integration/test_docker_manager_integration.py @@ -8,21 +8,18 @@ Skip with: pytest -vx -m "not docker_integration" """ -from __future__ import annotations - import asyncio -import os from collections.abc import AsyncIterator from pathlib import Path from uuid import uuid4 import pytest -from aiodocker import Docker from prompt_siren.sandbox_managers.abstract import AbstractSandboxManager +from prompt_siren.sandbox_managers.docker.local_client import LocalDockerClient from prompt_siren.sandbox_managers.docker.manager import ( - DockerSandboxConfig, DockerSandboxManager, ) +from prompt_siren.sandbox_managers.docker.plugins import AbstractDockerClient from prompt_siren.sandbox_managers.image_spec import ( BuildImageSpec, BuildStage, @@ -39,6 +36,9 @@ pytestmark = pytest.mark.anyio +# Path to test fixtures +FIXTURES_DIR = Path(__file__).parent / "fixtures" + # ==================== Shared Fixtures for Container Reuse ==================== @@ -46,12 +46,17 @@ @pytest.fixture(scope="module") async def basic_sandbox_manager( test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, ) -> AsyncIterator[tuple[AbstractSandboxManager, TaskSetup]]: """Create a sandbox manager with batch context for basic tests. Module-scoped to reuse across tests for performance. """ - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) @@ -219,15 +224,22 @@ async def test_exec_with_env_vars( class TestMultiContainerNetworking: """Tests for multi-container setups with networking.""" - async def test_multi_container_dns_resolution_and_communication(self, test_image: str): + async def test_multi_container_dns_resolution_and_communication( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): """Test that containers on the same network can resolve each other by hostname.""" - config = DockerSandboxConfig(network_enabled=True) + config = create_manager_config( + docker_client_type, network_enabled=True, test_images=test_image + ) manager = DockerSandboxManager(config) # Use custom test image with netcat pre-installed - fixtures_dir = os.path.join(os.path.dirname(__file__), "fixtures") network_image_spec = BuildImageSpec( - context_path=fixtures_dir, + context_path=str(FIXTURES_DIR), dockerfile_path="Dockerfile.network", tag="prompt-siren-network-test:latest", ) @@ -287,9 +299,18 @@ async def test_multi_container_dns_resolution_and_communication(self, test_image assert result.stdout is not None assert "test-message" in result.stdout - async def test_network_disabled_creates_internal_network(self, test_image: str): + async def test_network_disabled_creates_internal_network( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test that network_enabled=False creates internal-only network for multi-container.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) container_spec = ContainerSpec( @@ -312,13 +333,11 @@ async def test_network_disabled_creates_internal_network(self, test_image: str): assert sandbox_state.network_id is not None # Verify network is internal by inspecting it - docker = Docker() - try: - network = await docker.networks.get(sandbox_state.network_id) + if docker_client_type == "local": + assert docker_client is not None + network = await docker_client.get_network(sandbox_state.network_id) network_info = await network.show() assert network_info["Internal"] is True - finally: - await docker.close() # ==================== Container Cloning Tests ==================== @@ -328,9 +347,17 @@ async def test_network_disabled_creates_internal_network(self, test_image: str): class TestContainerCloning: """Tests for container cloning functionality.""" - async def test_clone_single_container(self, test_image: str): + async def test_clone_single_container( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): """Test cloning a single container creates snapshot.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) @@ -380,9 +407,18 @@ async def test_clone_single_container(self, test_image: str): assert "source content" in result.stdout assert "modified" not in result.stdout - async def test_clone_container_with_custom_command(self, test_image: str): + async def test_clone_container_with_custom_command( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test cloning a container with custom command preserves and runs the command.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) # Create container with custom command that keeps running @@ -403,60 +439,73 @@ async def test_clone_container_with_custom_command(self, test_image: str): network_config=None, ) - docker = Docker() - try: - async with manager.setup_batch([task_setup]): - async with manager.setup_task(task_setup) as source_state: - source_id = source_state.agent_container_id + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as source_state: + source_id = source_state.agent_container_id + # Only inspect Docker API for local docker + if docker_client_type == "local": + assert docker_client is not None # Verify source container's command via Docker API - source_container = await docker.containers.get(source_id) + source_container = await docker_client.get_container(source_id) source_info = await source_container.show() source_cmd = source_info["Config"]["Cmd"] assert source_cmd == custom_command - # Verify command is actually running in source container - # Use /proc filesystem which is available in all Linux containers - await asyncio.sleep(0.5) # Give process time to start - cmdline_result = await manager.exec( - source_id, - ["sh", "-c", "cat /proc/*/cmdline | tr '\\0' '\\n'"], - ) - assert cmdline_result.exit_code == 0 - assert cmdline_result.stdout is not None - assert "custom-process-marker" in cmdline_result.stdout - - # Clone the container - cloned_state = await manager.clone_sandbox_state(source_state) - cloned_id = cloned_state.agent_container_id + # Verify command is actually running in source container + # Use /proc filesystem which is available in all Linux containers + await asyncio.sleep(0.5) # Give process time to start + cmdline_result = await manager.exec( + source_id, + ["sh", "-c", "cat /proc/*/cmdline | tr '\\0' '\\n'"], + ) + assert cmdline_result.exit_code == 0 + assert cmdline_result.stdout is not None + assert "custom-process-marker" in cmdline_result.stdout - # Verify clone has different container ID - assert cloned_id != source_id + # Clone the container + cloned_state = await manager.clone_sandbox_state(source_state) + cloned_id = cloned_state.agent_container_id + + # Verify clone has different container ID + assert cloned_id != source_id + # Only inspect Docker API for local docker + if docker_client_type == "local": + assert docker_client is not None # Verify cloned container's command via Docker API - cloned_container = await docker.containers.get(cloned_id) + cloned_container = await docker_client.get_container(cloned_id) cloned_info = await cloned_container.show() cloned_cmd = cloned_info["Config"]["Cmd"] assert cloned_cmd == custom_command - # Verify both containers are running - assert source_info["State"]["Running"] is True + # Re-fetch source_info to verify both containers are running + source_container = await docker_client.get_container(source_id) + source_info_check = await source_container.show() + assert source_info_check["State"]["Running"] is True assert cloned_info["State"]["Running"] is True - # Verify command is actually running in cloned container - cloned_cmdline_result = await manager.exec( - cloned_id, - ["sh", "-c", "cat /proc/*/cmdline | tr '\\0' '\\n'"], - ) - assert cloned_cmdline_result.exit_code == 0 - assert cloned_cmdline_result.stdout is not None - assert "custom-process-marker" in cloned_cmdline_result.stdout - finally: - await docker.close() - - async def test_clone_multi_container_with_network(self, test_image: str): + # Verify command is actually running in cloned container + cloned_cmdline_result = await manager.exec( + cloned_id, + ["sh", "-c", "cat /proc/*/cmdline | tr '\\0' '\\n'"], + ) + assert cloned_cmdline_result.exit_code == 0 + assert cloned_cmdline_result.stdout is not None + assert "custom-process-marker" in cloned_cmdline_result.stdout + + async def test_clone_multi_container_with_network( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test cloning multi-container setup clones network too.""" - config = DockerSandboxConfig(network_enabled=True) + config = create_manager_config( + docker_client_type, network_enabled=True, test_images=test_image + ) manager = DockerSandboxManager(config) container_spec = ContainerSpec( @@ -490,19 +539,26 @@ async def test_clone_multi_container_with_network(self, test_image: str): assert cloned_state.network_id is not None # Verify cloned containers can communicate on new network - # (Use Docker API to verify network attachment) - docker = Docker() - try: - clone_agent = await docker.containers.get(cloned_state.agent_container_id) + # (Use Docker API to verify network attachment - only for local) + if docker_client_type == "local": + assert docker_client is not None + clone_agent = await docker_client.get_container(cloned_state.agent_container_id) clone_info = await clone_agent.show() networks = clone_info["NetworkSettings"]["Networks"] assert len(networks) > 0 - finally: - await docker.close() - async def test_clone_cleanup_removes_temp_images(self, test_image: str): + async def test_clone_cleanup_removes_temp_images( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test that cloning cleanup removes temporary images.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) @@ -514,24 +570,31 @@ async def test_clone_cleanup_removes_temp_images(self, test_image: str): network_config=None, ) - docker = Docker() - try: - async with manager.setup_batch([task_setup]): - async with manager.setup_task(task_setup) as source_state: - # Clone multiple times - cloned_states = [] - for _ in range(3): - cloned_state = await manager.clone_sandbox_state(source_state) - cloned_states.append(cloned_state) + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as source_state: + # Clone multiple times + cloned_states = [] + for _ in range(3): + cloned_state = await manager.clone_sandbox_state(source_state) + cloned_states.append(cloned_state) + # Only verify temp images for local docker + if docker_client_type == "local": + assert docker_client is not None # Verify temp images exist - images = await docker.images.list() + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] temp_images_count = sum(1 for tag in image_tags if tag and "temp-clone-" in tag) assert temp_images_count >= 3 - # After task cleanup, temp images should be gone - images = await docker.images.list() + # After task cleanup, temp images should be gone (only check for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] # Filter for temp images from this specific execution @@ -540,8 +603,6 @@ async def test_clone_cleanup_removes_temp_images(self, test_image: str): # Verify this specific clone's temp image is gone temp_image_pattern = f"temp-clone-{cloned_state.execution_id}" assert not any(temp_image_pattern in (tag or "") for tag in image_tags) - finally: - await docker.close() # ==================== Concurrent Execution Tests ==================== @@ -551,9 +612,17 @@ async def test_clone_cleanup_removes_temp_images(self, test_image: str): class TestConcurrentExecution: """Tests for concurrent task execution.""" - async def test_parallel_tasks_with_same_task_id(self, test_image: str): + async def test_parallel_tasks_with_same_task_id( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): """Test that parallel tasks with same task_id are independent.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) @@ -593,9 +662,17 @@ async def run_independent_task(task_num: int) -> str: assert len(container_ids) == 5 assert len(set(container_ids)) == 5 - async def test_concurrent_cloning(self, test_image: str): + async def test_concurrent_cloning( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): """Test that concurrent cloning operations are safe.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) @@ -636,13 +713,22 @@ async def test_concurrent_cloning(self, test_image: str): class TestImageBuilding: """Tests for building Docker images from Dockerfiles.""" - async def test_build_image_from_dockerfile(self, test_image: str): + async def test_build_image_from_dockerfile( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test building an image from a Dockerfile using BuildImageSpec.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) build_spec = BuildImageSpec( - context_path="tests/integration/fixtures", + context_path=str(FIXTURES_DIR), tag="prompt-siren-test-build:latest", ) @@ -655,44 +741,66 @@ async def test_build_image_from_dockerfile(self, test_image: str): network_config=None, ) - docker = Docker() - try: - # Cleanup any existing test image + # Cleanup any existing test image (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) try: - await docker.images.delete("prompt-siren-test-build:latest", force=True) + await local_client._docker.images.delete( + "prompt-siren-test-build:latest", force=True + ) except Exception: pass - async with manager.setup_batch([task_setup]): - # Verify image was built - images = await docker.images.list() + async with manager.setup_batch([task_setup]): + # Verify image was built (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] assert any("prompt-siren-test-build:latest" in (tag or "") for tag in image_tags) - # Create container and verify build marker - async with manager.setup_task(task_setup) as sandbox_state: - result = await manager.exec( - sandbox_state.agent_container_id, - ["cat", "/test-marker.txt"], - ) - assert result.exit_code == 0 - assert result.stdout is not None - assert "Test build successful" in result.stdout - finally: - # Cleanup + # Create container and verify build marker + async with manager.setup_task(task_setup) as sandbox_state: + result = await manager.exec( + sandbox_state.agent_container_id, + ["cat", "/test-marker.txt"], + ) + assert result.exit_code == 0 + assert result.stdout is not None + assert "Test build successful" in result.stdout + + # Cleanup (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) try: - await docker.images.delete("prompt-siren-test-build:latest", force=True) + await local_client._docker.images.delete( + "prompt-siren-test-build:latest", force=True + ) except Exception: pass - await docker.close() - async def test_build_with_build_args(self): + async def test_build_with_build_args( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test building an image with build_args and custom dockerfile_path.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) build_spec = BuildImageSpec( - context_path="tests/integration/fixtures", + context_path=str(FIXTURES_DIR), dockerfile_path="Dockerfile.dev", tag="prompt-siren-test-build-args:latest", build_args={"TEST_ARG": "custom_value"}, @@ -707,40 +815,58 @@ async def test_build_with_build_args(self): network_config=None, ) - docker = Docker() - try: - # Cleanup any existing test image + # Cleanup any existing test image (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) try: - await docker.images.delete("prompt-siren-test-build-args:latest", force=True) + await local_client._docker.images.delete( + "prompt-siren-test-build-args:latest", force=True + ) except Exception: pass - async with manager.setup_batch([task_setup]): - async with manager.setup_task(task_setup) as sandbox_state: - # Verify build arg was used - result = await manager.exec( - sandbox_state.agent_container_id, - ["cat", "/build-arg-test.txt"], - ) - assert result.exit_code == 0 - assert result.stdout is not None - assert "custom_value" in result.stdout - finally: - # Cleanup + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as sandbox_state: + # Verify build arg was used + result = await manager.exec( + sandbox_state.agent_container_id, + ["cat", "/build-arg-test.txt"], + ) + assert result.exit_code == 0 + assert result.stdout is not None + assert "custom_value" in result.stdout + + # Cleanup (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) try: - await docker.images.delete("prompt-siren-test-build-args:latest", force=True) + await local_client._docker.images.delete( + "prompt-siren-test-build-args:latest", force=True + ) except Exception: pass - await docker.close() - async def test_mixed_pull_and_build_specs(self, test_image: str): + async def test_mixed_pull_and_build_specs( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test using both PullImageSpec and BuildImageSpec in the same batch.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) pull_spec = PullImageSpec(tag=test_image) build_spec = BuildImageSpec( - context_path="tests/integration/fixtures", + context_path=str(FIXTURES_DIR), tag="prompt-siren-test-mixed:latest", ) @@ -762,38 +888,47 @@ async def test_mixed_pull_and_build_specs(self, test_image: str): ), ] - docker = Docker() - try: - # Cleanup any existing test image + # Cleanup any existing test image (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) try: - await docker.images.delete("prompt-siren-test-mixed:latest", force=True) + await local_client._docker.images.delete( + "prompt-siren-test-mixed:latest", force=True + ) except Exception: pass - async with manager.setup_batch(task_setups): - # Create containers from both images - async with manager.setup_task(task_setups[0]) as pulled_state: - result1 = await manager.exec( - pulled_state.agent_container_id, - ["echo", "pulled"], - ) - assert result1.exit_code == 0 - - async with manager.setup_task(task_setups[1]) as built_state: - result2 = await manager.exec( - built_state.agent_container_id, - ["cat", "/test-marker.txt"], - ) - assert result2.exit_code == 0 - assert result2.stdout is not None - assert "Test build successful" in result2.stdout - finally: - # Cleanup + async with manager.setup_batch(task_setups): + # Create containers from both images + async with manager.setup_task(task_setups[0]) as pulled_state: + result1 = await manager.exec( + pulled_state.agent_container_id, + ["echo", "pulled"], + ) + assert result1.exit_code == 0 + + async with manager.setup_task(task_setups[1]) as built_state: + result2 = await manager.exec( + built_state.agent_container_id, + ["cat", "/test-marker.txt"], + ) + assert result2.exit_code == 0 + assert result2.stdout is not None + assert "Test build successful" in result2.stdout + + # Cleanup (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) try: - await docker.images.delete("prompt-siren-test-mixed:latest", force=True) + await local_client._docker.images.delete( + "prompt-siren-test-mixed:latest", force=True + ) except Exception: pass - await docker.close() # ==================== Multi-Stage Build Tests ==================== @@ -803,9 +938,18 @@ async def test_mixed_pull_and_build_specs(self, test_image: str): class TestMultiStageBuild: """Tests for multi-stage Docker builds with caching.""" - async def test_multi_stage_build_creates_all_stages(self): + async def test_multi_stage_build_creates_all_stages( + self, + multistage_test_images: list[str], + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test that multi-stage build creates all three stages correctly.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=multistage_test_images + ) manager = DockerSandboxManager(config) base_tag = "test-multistage-base:latest" @@ -815,18 +959,18 @@ async def test_multi_stage_build_creates_all_stages(self): stages = [ BuildStage( tag=base_tag, - context_path="tests/integration/fixtures/multistage/base", + context_path=str(FIXTURES_DIR / "multistage" / "base"), cache_key=base_tag, ), BuildStage( tag=env_tag, - context_path="tests/integration/fixtures/multistage/env", + context_path=str(FIXTURES_DIR / "multistage" / "env"), parent_tag=base_tag, cache_key=env_tag, ), BuildStage( tag=instance_tag, - context_path="tests/integration/fixtures/multistage/instance", + context_path=str(FIXTURES_DIR / "multistage" / "instance"), parent_tag=env_tag, ), ] @@ -842,57 +986,75 @@ async def test_multi_stage_build_creates_all_stages(self): network_config=None, ) - docker = Docker() - try: - # Cleanup any existing images + # Cleanup any existing images (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) for tag in [base_tag, env_tag, instance_tag]: try: - await docker.images.delete(tag, force=True) + await local_client._docker.images.delete(tag, force=True) except Exception: # noqa: PERF203 pass - async with manager.setup_batch([task_setup]): - # Verify all three images were created - images = await docker.images.list() + async with manager.setup_batch([task_setup]): + # Verify all three images were created (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] assert any(base_tag in (tag or "") for tag in image_tags) assert any(env_tag in (tag or "") for tag in image_tags) assert any(instance_tag in (tag or "") for tag in image_tags) - # Create container and verify all stages executed - async with manager.setup_task(task_setup) as sandbox_state: - container_id = sandbox_state.agent_container_id + # Create container and verify all stages executed + async with manager.setup_task(task_setup) as sandbox_state: + container_id = sandbox_state.agent_container_id - # Verify base stage - result = await manager.exec(container_id, ["cat", "/base-marker.txt"]) - assert result.exit_code == 0 - assert result.stdout is not None - assert "Base stage built" in result.stdout + # Verify base stage + result = await manager.exec(container_id, ["cat", "/base-marker.txt"]) + assert result.exit_code == 0 + assert result.stdout is not None + assert "Base stage built" in result.stdout - # Verify env stage - result = await manager.exec(container_id, ["cat", "/env-marker.txt"]) - assert result.exit_code == 0 - assert result.stdout is not None - assert "Environment ready" in result.stdout + # Verify env stage + result = await manager.exec(container_id, ["cat", "/env-marker.txt"]) + assert result.exit_code == 0 + assert result.stdout is not None + assert "Environment ready" in result.stdout - # Verify instance stage - result = await manager.exec(container_id, ["cat", "/instance-marker.txt"]) - assert result.exit_code == 0 - assert result.stdout is not None - assert "Instance ready" in result.stdout - finally: - # Cleanup + # Verify instance stage + result = await manager.exec(container_id, ["cat", "/instance-marker.txt"]) + assert result.exit_code == 0 + assert result.stdout is not None + assert "Instance ready" in result.stdout + + # Cleanup (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) for tag in [base_tag, env_tag, instance_tag]: try: - await docker.images.delete(tag, force=True) + await local_client._docker.images.delete(tag, force=True) except Exception: # noqa: PERF203 pass - await docker.close() - async def test_multi_stage_build_caching(self): + async def test_multi_stage_build_caching( + self, + multistage_test_images: list[str], + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test that multi-stage build properly caches intermediate stages.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=multistage_test_images + ) manager = DockerSandboxManager(config) base_tag = "test-multistage-cache-base:latest" @@ -902,18 +1064,18 @@ async def test_multi_stage_build_caching(self): stages = [ BuildStage( tag=base_tag, - context_path="tests/integration/fixtures/multistage/base", + context_path=str(FIXTURES_DIR / "multistage" / "base"), cache_key=base_tag, ), BuildStage( tag=env_tag, - context_path="tests/integration/fixtures/multistage/env", + context_path=str(FIXTURES_DIR / "multistage" / "env"), parent_tag=base_tag, cache_key=env_tag, ), BuildStage( tag=instance_tag, - context_path="tests/integration/fixtures/multistage/instance", + context_path=str(FIXTURES_DIR / "multistage" / "instance"), parent_tag=env_tag, ), ] @@ -923,72 +1085,99 @@ async def test_multi_stage_build_caching(self): container_spec = ContainerSpec(image_spec=multi_stage_spec) agent_container = ContainerSetup(name="agent", spec=container_spec) - docker = Docker() - try: - # Cleanup any existing images + # Cleanup any existing images (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) for tag in [base_tag, env_tag, instance_tag]: try: - await docker.images.delete(tag, force=True) + await local_client._docker.images.delete(tag, force=True) except Exception: # noqa: PERF203 pass - # First build - all stages should be built - task_setup1 = TaskSetup( - task_id="cache-test-1", - agent_container=agent_container, - service_containers={}, - network_config=None, - ) + # First build - all stages should be built + task_setup1 = TaskSetup( + task_id="cache-test-1", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) - async with manager.setup_batch([task_setup1]): - images = await docker.images.list() + async with manager.setup_batch([task_setup1]): + # Verify all images created (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] assert any(base_tag in (tag or "") for tag in image_tags) assert any(env_tag in (tag or "") for tag in image_tags) assert any(instance_tag in (tag or "") for tag in image_tags) - # Delete only instance image - await docker.images.delete(instance_tag, force=True) + # Delete only instance image (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + await local_client._docker.images.delete(instance_tag, force=True) - # Second build - base and env should be cached - task_setup2 = TaskSetup( - task_id="cache-test-2", - agent_container=agent_container, - service_containers={}, - network_config=None, - ) + # Second build - base and env should be cached + task_setup2 = TaskSetup( + task_id="cache-test-2", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) - async with manager.setup_batch([task_setup2]): - # Verify all images exist again - images = await docker.images.list() + async with manager.setup_batch([task_setup2]): + # Verify all images exist again (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] assert any(base_tag in (tag or "") for tag in image_tags) assert any(env_tag in (tag or "") for tag in image_tags) assert any(instance_tag in (tag or "") for tag in image_tags) - # Verify functionality - async with manager.setup_task(task_setup2) as sandbox_state: - result = await manager.exec( - sandbox_state.agent_container_id, - ["cat", "/instance-marker.txt"], - ) - assert result.exit_code == 0 - assert result.stdout is not None - assert "Instance ready" in result.stdout - finally: - # Cleanup + # Verify functionality + async with manager.setup_task(task_setup2) as sandbox_state: + result = await manager.exec( + sandbox_state.agent_container_id, + ["cat", "/instance-marker.txt"], + ) + assert result.exit_code == 0 + assert result.stdout is not None + assert "Instance ready" in result.stdout + + # Cleanup (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) for tag in [base_tag, env_tag, instance_tag]: try: - await docker.images.delete(tag, force=True) + await local_client._docker.images.delete(tag, force=True) except Exception: # noqa: PERF203 pass - await docker.close() - async def test_shared_base_and_env_stages(self): + async def test_shared_base_and_env_stages( + self, + multistage_test_images: list[str], + docker_client_type: str, + skip_if_des_unavailable, + docker_client: AbstractDockerClient | None, + create_manager_config, + ): """Test multiple instances sharing base and env stages.""" - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=multistage_test_images + ) manager = DockerSandboxManager(config) base_tag = "test-shared-base:latest" @@ -1001,18 +1190,18 @@ async def test_shared_base_and_env_stages(self): stages=[ BuildStage( tag=base_tag, - context_path="tests/integration/fixtures/multistage/base", + context_path=str(FIXTURES_DIR / "multistage" / "base"), cache_key=base_tag, ), BuildStage( tag=env_tag, - context_path="tests/integration/fixtures/multistage/env", + context_path=str(FIXTURES_DIR / "multistage" / "env"), parent_tag=base_tag, cache_key=env_tag, ), BuildStage( tag=instance1_tag, - context_path="tests/integration/fixtures/multistage/instance", + context_path=str(FIXTURES_DIR / "multistage" / "instance"), parent_tag=env_tag, ), ], @@ -1023,18 +1212,18 @@ async def test_shared_base_and_env_stages(self): stages=[ BuildStage( tag=base_tag, - context_path="tests/integration/fixtures/multistage/base", + context_path=str(FIXTURES_DIR / "multistage" / "base"), cache_key=base_tag, ), BuildStage( tag=env_tag, - context_path="tests/integration/fixtures/multistage/env", + context_path=str(FIXTURES_DIR / "multistage" / "env"), parent_tag=base_tag, cache_key=env_tag, ), BuildStage( tag=instance2_tag, - context_path="tests/integration/fixtures/multistage/instance", + context_path=str(FIXTURES_DIR / "multistage" / "instance"), parent_tag=env_tag, ), ], @@ -1062,18 +1251,24 @@ async def test_shared_base_and_env_stages(self): ), ] - docker = Docker() - try: - # Cleanup + # Cleanup (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) for tag in [base_tag, env_tag, instance1_tag, instance2_tag]: try: - await docker.images.delete(tag, force=True) + await local_client._docker.images.delete(tag, force=True) except Exception: # noqa: PERF203 pass - async with manager.setup_batch(task_setups): - # Verify all images created - images = await docker.images.list() + async with manager.setup_batch(task_setups): + # Verify all images created (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] # Base and env should exist (built once) @@ -1084,25 +1279,339 @@ async def test_shared_base_and_env_stages(self): assert any(instance1_tag in (tag or "") for tag in image_tags) assert any(instance2_tag in (tag or "") for tag in image_tags) - # Verify both work - async with manager.setup_task(task_setups[0]) as state1: - result = await manager.exec( - state1.agent_container_id, - ["cat", "/base-marker.txt"], - ) - assert result.exit_code == 0 + # Verify both work + async with manager.setup_task(task_setups[0]) as state1: + result = await manager.exec( + state1.agent_container_id, + ["cat", "/base-marker.txt"], + ) + assert result.exit_code == 0 - async with manager.setup_task(task_setups[1]) as state2: - result = await manager.exec( - state2.agent_container_id, - ["cat", "/env-marker.txt"], - ) - assert result.exit_code == 0 - finally: - # Cleanup + async with manager.setup_task(task_setups[1]) as state2: + result = await manager.exec( + state2.agent_container_id, + ["cat", "/env-marker.txt"], + ) + assert result.exit_code == 0 + + # Cleanup (only for local) + if docker_client_type == "local": + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) for tag in [base_tag, env_tag, instance1_tag, instance2_tag]: try: - await docker.images.delete(tag, force=True) + await local_client._docker.images.delete(tag, force=True) except Exception: # noqa: PERF203 pass - await docker.close() + + +# ==================== Stdin Handling Tests ==================== + + +@pytest.mark.docker_integration +class TestStdinHandling: + """Integration tests for stdin handling in Docker exec operations.""" + + async def test_exec_with_stdin_string( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test executing a command with stdin as string.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-string-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Execute cat command with stdin - should echo back the input + result = await manager.exec( + state.agent_container_id, + ["cat"], + stdin="Hello, World!\nThis is a test.", + ) + + assert result.exit_code == 0 + assert result.stdout is not None + assert "Hello, World!" in result.stdout + assert "This is a test." in result.stdout + + async def test_exec_with_stdin_bytes( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test executing a command with stdin as bytes.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-bytes-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Execute cat command with bytes stdin + test_data = b"Binary test data\nWith multiple lines" + result = await manager.exec( + state.agent_container_id, + ["cat"], + stdin=test_data, + ) + + assert result.exit_code == 0 + assert result.stdout is not None + assert "Binary test data" in result.stdout + assert "With multiple lines" in result.stdout + + async def test_exec_with_stdin_special_characters( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test executing a command with stdin containing special characters.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-special-chars-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Test with special characters including single quotes, double quotes, backticks + special_input = ( + "Line with 'single quotes'\nLine with \"double quotes\"\nLine with `backticks`" + ) + result = await manager.exec( + state.agent_container_id, + ["cat"], + stdin=special_input, + ) + + assert result.exit_code == 0 + assert result.stdout is not None + # Verify all special characters are preserved + assert "'single quotes'" in result.stdout or "single quotes" in result.stdout + assert '"double quotes"' in result.stdout or "double quotes" in result.stdout + assert "`backticks`" in result.stdout or "backticks" in result.stdout + + async def test_exec_with_stdin_multiline( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test executing a command with multiline stdin.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-multiline-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Test with multiline input + multiline_input = "line 1\nline 2\nline 3\nline 4\nline 5" + result = await manager.exec( + state.agent_container_id, + ["cat"], + stdin=multiline_input, + ) + + assert result.exit_code == 0 + assert result.stdout is not None + assert "line 1" in result.stdout + assert "line 2" in result.stdout + assert "line 3" in result.stdout + assert "line 4" in result.stdout + assert "line 5" in result.stdout + + async def test_exec_with_stdin_piped_to_command( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test stdin being properly piped to a command that processes it.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-piped-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Use a command that processes stdin (word count) + # Note: wc -l counts newline characters, so input must end with \n + test_input = "line one\nline two\nline three\n" + result = await manager.exec( + state.agent_container_id, + ["wc", "-l"], + stdin=test_input, + ) + + assert result.exit_code == 0 + assert result.stdout is not None + # Should count 3 lines (3 newline characters) + assert "3" in result.stdout + + async def test_exec_with_stdin_empty_string( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test executing a command with empty stdin.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-empty-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Execute cat command with empty stdin + result = await manager.exec( + state.agent_container_id, + ["cat"], + stdin="", + ) + + assert result.exit_code == 0 + # Empty stdin should result in empty output + assert result.stdout is None + + async def test_exec_with_stdin_large_input( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test executing a command with large stdin input.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-large-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Create a large input (10KB) + large_input = "A" * 10000 + "\n" + result = await manager.exec( + state.agent_container_id, + ["wc", "-c"], + stdin=large_input, + ) + + assert result.exit_code == 0 + assert result.stdout is not None + # Should count ~10001 bytes (10000 A's + 1 newline) + assert "10001" in result.stdout + + async def test_exec_with_stdin_and_environment( + self, + test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, + ): + """Test executing a command with both stdin and environment variables.""" + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) + manager = DockerSandboxManager(config) + + container_spec = ContainerSpec(image_spec=PullImageSpec(tag=test_image)) + agent_container = ContainerSetup(name="agent", spec=container_spec) + task_setup = TaskSetup( + task_id="stdin-env-test", + agent_container=agent_container, + service_containers={}, + network_config=None, + ) + + async with manager.setup_batch([task_setup]): + async with manager.setup_task(task_setup) as state: + # Execute a script that uses both stdin and env var + stdin_data = "input from stdin" + result = await manager.exec( + state.agent_container_id, + ["sh", "-c", "echo ENV: $TEST_VAR && cat"], + stdin=stdin_data, + env={"TEST_VAR": "test_value"}, + ) + + assert result.exit_code == 0 + assert result.stdout is not None + # Should see both the env var and stdin content + assert "ENV: test_value" in result.stdout + assert "input from stdin" in result.stdout diff --git a/tests/integration/test_swebench_tools_integration.py b/tests/integration/test_swebench_tools_integration.py index 39a4ace..d4a639d 100644 --- a/tests/integration/test_swebench_tools_integration.py +++ b/tests/integration/test_swebench_tools_integration.py @@ -24,10 +24,7 @@ ) from prompt_siren.environments.bash_env import BashEnvState from prompt_siren.sandbox_managers.abstract import AbstractSandboxManager -from prompt_siren.sandbox_managers.docker.manager import ( - DockerSandboxConfig, - DockerSandboxManager, -) +from prompt_siren.sandbox_managers.docker.manager import DockerSandboxManager from prompt_siren.sandbox_managers.image_spec import PullImageSpec from prompt_siren.sandbox_managers.sandbox_state import SandboxState from prompt_siren.sandbox_managers.sandbox_task_setup import ( @@ -45,12 +42,17 @@ @pytest.fixture(scope="module") async def sandbox_manager( test_image: str, + docker_client_type: str, + skip_if_des_unavailable, + create_manager_config, ) -> AsyncIterator[tuple[AbstractSandboxManager, str, TaskSetup]]: """Create a sandbox manager for tool testing. Returns tuple of (sandbox_manager, test_image, task_setup) for use by dependent fixtures. """ - config = DockerSandboxConfig(network_enabled=False) + config = create_manager_config( + docker_client_type, network_enabled=False, test_images=test_image + ) manager = DockerSandboxManager(config) # Create TaskSetup for API diff --git a/tests/sandbox_managers/docker/plugins/__init__.py b/tests/sandbox_managers/docker/plugins/__init__.py new file mode 100644 index 0000000..ca9c96d --- /dev/null +++ b/tests/sandbox_managers/docker/plugins/__init__.py @@ -0,0 +1 @@ +"""Docker client plugin tests.""" diff --git a/tests/sandbox_managers/docker/plugins/test_local_client.py b/tests/sandbox_managers/docker/plugins/test_local_client.py new file mode 100644 index 0000000..a645830 --- /dev/null +++ b/tests/sandbox_managers/docker/plugins/test_local_client.py @@ -0,0 +1,485 @@ +"""Unit tests for LocalDockerClient.""" + +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from prompt_siren.sandbox_managers.abstract import ExecOutput +from prompt_siren.sandbox_managers.docker.local_client import ( + LocalContainer, + LocalDockerClient, + LocalNetwork, +) +from prompt_siren.sandbox_managers.docker.manager import ( + create_docker_client_from_config, +) +from prompt_siren.sandbox_managers.docker.plugins import ( + get_registered_docker_clients, +) + +pytestmark = pytest.mark.anyio + + +@pytest.fixture +def mock_docker(): + """Create a mock aiodocker.Docker instance.""" + docker = MagicMock() + docker.close = AsyncMock() + docker.images = MagicMock() + docker.images.inspect = AsyncMock() + docker.images.pull = AsyncMock() + docker.images.build = AsyncMock() + docker.images.delete = AsyncMock() + docker.containers = MagicMock() + docker.containers.create = AsyncMock() + docker.containers.get = AsyncMock() + docker.networks = MagicMock() + docker.networks.create = AsyncMock() + docker.networks.get = AsyncMock() + return docker + + +@pytest.fixture +def client(mock_docker): + """Create a LocalDockerClient with mocked aiodocker.""" + with patch("prompt_siren.sandbox_managers.docker.local_client.aiodocker.Docker") as mock_cls: + mock_cls.return_value = mock_docker + client = LocalDockerClient() + client._docker = mock_docker + return client + + +class TestLocalDockerClient: + """Tests for LocalDockerClient.""" + + async def test_close(self, client, mock_docker): + """Test closing the client.""" + await client.close() + mock_docker.close.assert_awaited_once() + + async def test_inspect_image(self, client, mock_docker): + """Test inspecting an image.""" + mock_docker.images.inspect.return_value = {"Id": "sha256:abc123"} + + result = await client.inspect_image("python:3.12") + + mock_docker.images.inspect.assert_awaited_once_with("python:3.12") + assert result == {"Id": "sha256:abc123"} + + async def test_pull_image(self, client, mock_docker): + """Test pulling an image.""" + await client.pull_image("python:3.12") + + mock_docker.images.pull.assert_awaited_once_with(from_image="python:3.12") + + async def test_build_image(self, client, mock_docker): + """Test building an image.""" + + # Mock build stream + async def mock_build(*args, **kwargs): + yield {"stream": "Step 1/2 : FROM python:3.12"} + yield {"stream": "Step 2/2 : RUN echo hello"} + + mock_docker.images.build = mock_build + + # Mock tarfile to avoid filesystem access + with patch( + "prompt_siren.sandbox_managers.docker.local_client.tarfile.open" + ) as mock_tar_open: + mock_tar = MagicMock() + mock_tar_open.return_value.__enter__.return_value = mock_tar + + logs = [ + log + async for log in client.build_image( + context_path="/tmp/test", + tag="test:latest", + dockerfile_path="Dockerfile", + buildargs={"ARG1": "value1"}, + ) + ] + + assert len(logs) == 2 + assert "Step 1/2" in logs[0]["stream"] + assert "Step 2/2" in logs[1]["stream"] + # Verify tar.add was called with the context path + mock_tar.add.assert_called_once() + + async def test_delete_image(self, client, mock_docker): + """Test deleting an image.""" + await client.delete_image("test:latest", force=True) + + mock_docker.images.delete.assert_awaited_once_with("test:latest", force=True) + + async def test_create_container(self, client, mock_docker): + """Test creating a container.""" + mock_container = MagicMock() + mock_docker.containers.create.return_value = mock_container + + config = {"Image": "python:3.12"} + container = await client.create_container(config, name="test-container") + + mock_docker.containers.create.assert_awaited_once_with(config, name="test-container") + assert isinstance(container, LocalContainer) + + async def test_get_container(self, client, mock_docker): + """Test getting a container.""" + mock_container = MagicMock() + mock_docker.containers.get.return_value = mock_container + + container = await client.get_container("container123") + + mock_docker.containers.get.assert_awaited_once_with("container123") + assert isinstance(container, LocalContainer) + + async def test_create_network(self, client, mock_docker): + """Test creating a network.""" + mock_network = MagicMock() + mock_docker.networks.create.return_value = mock_network + + config = {"Name": "test-network", "Driver": "bridge"} + network = await client.create_network(config) + + mock_docker.networks.create.assert_awaited_once_with(config=config) + assert isinstance(network, LocalNetwork) + + async def test_get_network(self, client, mock_docker): + """Test getting a network.""" + mock_network = MagicMock() + mock_docker.networks.get.return_value = mock_network + + network = await client.get_network("network123") + + mock_docker.networks.get.assert_awaited_once_with("network123") + assert isinstance(network, LocalNetwork) + + +class TestLocalContainer: + """Tests for LocalContainer wrapper.""" + + async def test_start(self): + """Test starting a container.""" + mock_container = MagicMock() + mock_container.start = AsyncMock() + container = LocalContainer(mock_container) + + await container.start() + + mock_container.start.assert_awaited_once() + + async def test_stop(self): + """Test stopping a container.""" + mock_container = MagicMock() + mock_container.stop = AsyncMock() + container = LocalContainer(mock_container) + + await container.stop() + + mock_container.stop.assert_awaited_once() + + async def test_delete(self): + """Test deleting a container.""" + mock_container = MagicMock() + mock_container.delete = AsyncMock() + container = LocalContainer(mock_container) + + await container.delete() + + mock_container.delete.assert_awaited_once() + + async def test_show(self): + """Test showing container details.""" + mock_container = MagicMock() + mock_container.show = AsyncMock(return_value={"Id": "abc123", "State": {"Running": True}}) + container = LocalContainer(mock_container) + + info = await container.show() + + assert info["Id"] == "abc123" + assert info["State"]["Running"] is True + + async def test_exec(self): + """Test executing a command.""" + # Mock the exec instance and stream + mock_msg = MagicMock() + mock_msg.stream = 1 + mock_msg.data = b"hello\n" + + mock_stream = MagicMock() + mock_stream.read_out = AsyncMock(side_effect=[mock_msg, None]) + mock_stream.write_in = AsyncMock() + mock_stream._resp = MagicMock() + mock_stream._resp.connection = MagicMock() + mock_stream._resp.connection.transport = None + + mock_exec = MagicMock() + mock_exec.start = MagicMock(return_value=mock_stream) + mock_exec.inspect = AsyncMock(return_value={"ExitCode": 0}) + + mock_container = MagicMock() + mock_container.exec = AsyncMock(return_value=mock_exec) + container = LocalContainer(mock_container) + + result = await container.exec( + cmd=["bash", "-c", "echo hello"], + stdin=None, + user="root", + environment={"VAR": "value"}, + workdir="/app", + timeout=30, + ) + + assert isinstance(result, ExecOutput) + assert result.exit_code == 0 + assert result.stdout == "hello\n" + mock_container.exec.assert_awaited_once() + + async def test_log(self): + """Test getting container logs.""" + mock_container = MagicMock() + mock_container.log = AsyncMock(return_value=["line1\n", "line2\n"]) + container = LocalContainer(mock_container) + + logs = await container.log(stdout=True, stderr=True) + + assert logs == ["line1\n", "line2\n"] + + async def test_commit(self): + """Test committing a container.""" + mock_container = MagicMock() + mock_container.commit = AsyncMock() + container = LocalContainer(mock_container) + + await container.commit(repository="test-image", tag="latest") + + mock_container.commit.assert_awaited_once_with(repository="test-image", tag="latest") + + +class TestLocalNetwork: + """Tests for LocalNetwork wrapper.""" + + async def test_show(self): + """Test showing network details.""" + mock_network = MagicMock() + mock_network.show = AsyncMock(return_value={"Id": "net123", "Driver": "bridge"}) + network = LocalNetwork(mock_network) + + info = await network.show() + + assert info["Id"] == "net123" + assert info["Driver"] == "bridge" + + async def test_delete(self): + """Test deleting a network.""" + mock_network = MagicMock() + mock_network.delete = AsyncMock() + network = LocalNetwork(mock_network) + + await network.delete() + + mock_network.delete.assert_awaited_once() + + +class TestLocalDockerClientRegistry: + """Tests for LocalDockerClient registry integration.""" + + def test_local_client_can_be_created_via_registry(self): + """Verify local client can be instantiated through the registry.""" + assert "local" in get_registered_docker_clients() + + # Mock aiodocker.Docker to avoid needing a running event loop + with patch("prompt_siren.sandbox_managers.docker.local_client.aiodocker.Docker"): + client = create_docker_client_from_config("local", {}) + assert isinstance(client, LocalDockerClient) + + +@pytest.mark.docker_integration +class TestLocalDockerClientIntegration: + """Integration tests for LocalDockerClient that use actual Docker daemon.""" + + async def test_client_lifecycle(self): + """Test creating and closing a client.""" + client = LocalDockerClient() + + try: + # Verify client is initialized + assert client._docker is not None + finally: + await client.close() + + async def test_container_lifecycle(self): + """Test creating, starting, stopping, and deleting a container.""" + client = LocalDockerClient() + + try: + # Create container + config = { + "Image": "debian:bookworm-slim", + "Cmd": ["sleep", "300"], + } + container = await client.create_container(config, name="test-local-container-lifecycle") + + try: + # Start container + await container.start() + + # Check container is running + info = await container.show() + assert info["State"]["Running"] is True + + # Stop container + await container.stop() + + # Verify stopped + info = await container.show() + assert info["State"]["Running"] is False + finally: + # Clean up container + await container.delete() + finally: + await client.close() + + async def test_exec_command(self): + """Test executing a command in a container.""" + client = LocalDockerClient() + + try: + # Create and start container + config = { + "Image": "debian:bookworm-slim", + "Cmd": ["sleep", "300"], + } + container = await client.create_container(config, name="test-local-exec-command") + + try: + await container.start() + + # Execute command + result = await container.exec( + cmd=["echo", "hello", "world"], + stdin=None, + user="root", + environment=None, + workdir=None, + timeout=30, + ) + + # Verify output contains "hello world" + assert result.stdout is not None + assert "hello world" in result.stdout + assert result.exit_code == 0 + finally: + await container.delete() + finally: + await client.close() + + async def test_network_lifecycle(self): + """Test creating and deleting a network.""" + client = LocalDockerClient() + + try: + # Create network + config = { + "Name": "test-local-network-lifecycle", + "Driver": "bridge", + "Internal": False, + } + network = await client.create_network(config) + + try: + # Verify network exists + info = await network.show() + assert info["Name"] == "test-local-network-lifecycle" + assert info["Driver"] == "bridge" + finally: + # Clean up network + await network.delete() + finally: + await client.close() + + async def test_image_operations(self): + """Test pulling and inspecting images.""" + client = LocalDockerClient() + + try: + # Pull a small test image + await client.pull_image("alpine:latest") + + # Inspect the image + image_info = await client.inspect_image("alpine:latest") + assert image_info["Id"] is not None + assert "alpine" in image_info["RepoTags"][0].lower() + finally: + await client.close() + + async def test_container_logs(self): + """Test getting container logs.""" + client = LocalDockerClient() + + try: + # Create and start container with a command that produces output + config = { + "Image": "debian:bookworm-slim", + "Cmd": ["sh", "-c", "'echo \"test log output\" && sleep 300'"], + } + container = await client.create_container(config, name="test-local-container-logs") + + try: + await container.start() + + # Wait a bit for the command to execute + await asyncio.sleep(2) + + # Get logs + logs = await container.log(stdout=True, stderr=True) + + # Verify logs contain expected output + log_text = "\n".join(logs) + assert "test log output" in log_text + finally: + await container.delete() + finally: + await client.close() + + async def test_container_commit(self): + """Test committing a container to an image.""" + client = LocalDockerClient() + + try: + # Create and start container + config = { + "Image": "debian:bookworm-slim", + "Cmd": ["sleep", "300"], + } + container = await client.create_container(config, name="test-local-container-commit") + + try: + await container.start() + + # Make a change in the container + await container.exec( + cmd=["sh", "-c", "echo 'test' > /tmp/testfile"], + stdin=None, + user="root", + environment=None, + workdir=None, + timeout=30, + ) + + # Commit container to new image + await container.commit(repository="test-local-committed-image", tag="v1") + + # Verify image exists + image_info = await client.inspect_image("test-local-committed-image:v1") + assert image_info["Id"] is not None + finally: + # Clean up + await container.delete() + try: + await client.delete_image("test-local-committed-image:v1", force=True) + except Exception: + pass # Ignore cleanup errors + finally: + await client.close() diff --git a/tests/sandbox_managers/docker/test_manager.py b/tests/sandbox_managers/docker/test_manager.py index 7848c49..158fc6e 100644 --- a/tests/sandbox_managers/docker/test_manager.py +++ b/tests/sandbox_managers/docker/test_manager.py @@ -9,10 +9,9 @@ from __future__ import annotations import asyncio -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, patch import pytest -from aiodocker import Docker from prompt_siren.sandbox_managers.docker.manager import ( DockerSandboxConfig, DockerSandboxManager, @@ -51,29 +50,26 @@ def task_setup() -> TaskSetup: class TestDockerSandboxManagerCleanup: """Tests for cleanup orchestration.""" - @patch("prompt_siren.sandbox_managers.docker.manager.aiodocker.Docker") + @patch("prompt_siren.sandbox_managers.docker.manager.create_docker_client_from_config") async def test_setup_batch_cleans_up_contexts( - self, mock_docker_cls: Docker, manager: DockerSandboxManager, task_setup: TaskSetup + self, mock_get_client, manager: DockerSandboxManager, task_setup: TaskSetup ): """Test that setup_batch cleans up all task contexts on exit.""" mock_docker = AsyncMock() mock_docker.close = AsyncMock() - mock_docker.images = MagicMock() - mock_docker.images.inspect = AsyncMock() - mock_docker.containers = MagicMock() - mock_docker.containers.create = AsyncMock() - mock_docker.containers.get = AsyncMock() - mock_docker_cls.return_value = mock_docker # type: ignore -- mock class - - # Create mock container - mock_container = MagicMock() + mock_docker.inspect_image = AsyncMock() + + # Create mock container (AbstractContainer interface) + mock_container = AsyncMock() mock_container.show = AsyncMock( return_value={"Id": "test-container-id", "State": {"Running": True}} ) mock_container.start = AsyncMock() mock_container.stop = AsyncMock() mock_container.delete = AsyncMock() - mock_docker.containers.create.return_value = mock_container + + mock_docker.create_container = AsyncMock(return_value=mock_container) + mock_get_client.return_value = mock_docker async with manager.setup_batch([task_setup]): # Create a task (which creates a context) @@ -119,16 +115,15 @@ async def test_exec_outside_batch_raises_error(self, manager: DockerSandboxManag class TestDockerSandboxManagerContextLookup: """Tests for context lookup logic.""" - @patch("prompt_siren.sandbox_managers.docker.manager.aiodocker.Docker") + @patch("prompt_siren.sandbox_managers.docker.manager.create_docker_client_from_config") async def test_clone_invalid_execution_id_raises_error( - self, mock_docker_cls: Docker, manager: DockerSandboxManager, task_setup: TaskSetup + self, mock_get_client, manager: DockerSandboxManager, task_setup: TaskSetup ): """Test that cloning with invalid execution_id raises ValueError.""" mock_docker = AsyncMock() mock_docker.close = AsyncMock() - mock_docker.images = MagicMock() - mock_docker.images.inspect = AsyncMock() - mock_docker_cls.return_value = mock_docker # type: ignore -- mock class + mock_docker.inspect_image = AsyncMock() + mock_get_client.return_value = mock_docker mock_state = SandboxState( agent_container_id="test-id", @@ -145,24 +140,22 @@ async def test_clone_invalid_execution_id_raises_error( class TestDockerSandboxManagerConcurrency: """Tests for concurrent task execution.""" - @patch("prompt_siren.sandbox_managers.docker.manager.aiodocker.Docker") + @patch("prompt_siren.sandbox_managers.docker.manager.create_docker_client_from_config") async def test_parallel_tasks_with_same_task_id( - self, mock_docker_cls: Docker, manager: DockerSandboxManager, task_setup: TaskSetup + self, mock_get_client, manager: DockerSandboxManager, task_setup: TaskSetup ): """Test that parallel tasks with same task_id get unique execution_ids.""" mock_docker = AsyncMock() mock_docker.close = AsyncMock() - mock_docker.images = MagicMock() - mock_docker.images.inspect = AsyncMock() - mock_docker.containers = MagicMock() + mock_docker.inspect_image = AsyncMock() # Track execution IDs execution_ids = [] - # Create multiple mock containers + # Create multiple mock containers (AbstractContainer interface) containers = [] for i in range(3): - mock_container = MagicMock() + mock_container = AsyncMock() mock_container.show = AsyncMock( return_value={"Id": f"container-{i}", "State": {"Running": True}} ) @@ -171,8 +164,8 @@ async def test_parallel_tasks_with_same_task_id( mock_container.delete = AsyncMock() containers.append(mock_container) - mock_docker.containers.create = AsyncMock(side_effect=containers) - mock_docker_cls.return_value = mock_docker # type: ignore -- mock class + mock_docker.create_container = AsyncMock(side_effect=containers) + mock_get_client.return_value = mock_docker async def run_task(): async with manager.setup_task(task_setup) as sandbox_state: @@ -193,25 +186,23 @@ async def run_task(): class TestDockerSandboxManagerCloning: """Tests for container cloning functionality.""" - @patch("prompt_siren.sandbox_managers.docker.manager.aiodocker.Docker") + @patch("prompt_siren.sandbox_managers.docker.manager.create_docker_client_from_config") async def test_clone_preserves_custom_command( - self, mock_docker_cls: Docker, manager: DockerSandboxManager, task_setup: TaskSetup + self, mock_get_client, manager: DockerSandboxManager, task_setup: TaskSetup ): """Test that cloning a container preserves custom command.""" - # Setup mock Docker client + # Setup mock Docker client (AbstractDockerClient interface) mock_docker = AsyncMock() mock_docker.close = AsyncMock() - mock_docker.images = MagicMock() - mock_docker.images.inspect = AsyncMock() - mock_docker.containers = MagicMock() - mock_docker.networks = MagicMock() - mock_docker_cls.return_value = mock_docker # type: ignore -- mock class + mock_docker.inspect_image = AsyncMock() + mock_docker.delete_image = AsyncMock() + mock_get_client.return_value = mock_docker # Custom command to test custom_command = ["/bin/bash", "-c", "while true; do echo 'test'; sleep 1; done"] - # Mock source container with custom command - source_container = MagicMock() + # Mock source container (AbstractContainer interface) + source_container = AsyncMock() source_container.show = AsyncMock( return_value={ "Id": "source-container-id", @@ -229,8 +220,8 @@ async def test_clone_preserves_custom_command( source_container.delete = AsyncMock() source_container.commit = AsyncMock() - # Mock cloned container - cloned_container = MagicMock() + # Mock cloned container (AbstractContainer interface) + cloned_container = AsyncMock() cloned_container.show = AsyncMock( return_value={ "Id": "cloned-container-id", @@ -248,8 +239,8 @@ async def test_clone_preserves_custom_command( cloned_container.delete = AsyncMock() # Setup container creation sequence - mock_docker.containers.create = AsyncMock(side_effect=[source_container, cloned_container]) - mock_docker.containers.get = AsyncMock(return_value=source_container) + mock_docker.create_container = AsyncMock(side_effect=[source_container, cloned_container]) + mock_docker.get_container = AsyncMock(return_value=source_container) async with manager.setup_batch([task_setup]): async with manager.setup_task(task_setup) as source_state: @@ -266,7 +257,7 @@ async def test_clone_preserves_custom_command( assert "temp-clone-" in commit_call.kwargs["repository"] # Verify new container was created with custom command preserved - create_calls = mock_docker.containers.create.call_args_list + create_calls = mock_docker.create_container.call_args_list assert len(create_calls) == 2 # Source + clone clone_create_call = create_calls[1] # Second call is the clone From 0f003de31d1695cbddf49617ef6f3dfd01634acc Mon Sep 17 00:00:00 2001 From: Ivan Evtimov Date: Wed, 10 Dec 2025 07:05:46 -0500 Subject: [PATCH 2/5] remove local-only checks for docker client integration tests --- .../test_docker_manager_integration.py | 455 ++++++++---------- 1 file changed, 199 insertions(+), 256 deletions(-) diff --git a/tests/integration/test_docker_manager_integration.py b/tests/integration/test_docker_manager_integration.py index 2d5ed25..02a4fc2 100644 --- a/tests/integration/test_docker_manager_integration.py +++ b/tests/integration/test_docker_manager_integration.py @@ -10,6 +10,7 @@ import asyncio from collections.abc import AsyncIterator +from importlib import resources from pathlib import Path from uuid import uuid4 @@ -37,7 +38,7 @@ pytestmark = pytest.mark.anyio # Path to test fixtures -FIXTURES_DIR = Path(__file__).parent / "fixtures" +FIXTURES_DIR = resources.files("tests.integration.fixtures") # ==================== Shared Fixtures for Container Reuse ==================== @@ -47,7 +48,6 @@ async def basic_sandbox_manager( test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ) -> AsyncIterator[tuple[AbstractSandboxManager, TaskSetup]]: """Create a sandbox manager with batch context for basic tests. @@ -228,7 +228,6 @@ async def test_multi_container_dns_resolution_and_communication( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test that containers on the same network can resolve each other by hostname.""" @@ -303,7 +302,6 @@ async def test_network_disabled_creates_internal_network( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -333,11 +331,10 @@ async def test_network_disabled_creates_internal_network( assert sandbox_state.network_id is not None # Verify network is internal by inspecting it - if docker_client_type == "local": - assert docker_client is not None - network = await docker_client.get_network(sandbox_state.network_id) - network_info = await network.show() - assert network_info["Internal"] is True + assert docker_client is not None + network = await docker_client.get_network(sandbox_state.network_id) + network_info = await network.show() + assert network_info["Internal"] is True # ==================== Container Cloning Tests ==================== @@ -351,7 +348,6 @@ async def test_clone_single_container( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test cloning a single container creates snapshot.""" @@ -411,7 +407,6 @@ async def test_clone_container_with_custom_command( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -443,14 +438,12 @@ async def test_clone_container_with_custom_command( async with manager.setup_task(task_setup) as source_state: source_id = source_state.agent_container_id - # Only inspect Docker API for local docker - if docker_client_type == "local": - assert docker_client is not None - # Verify source container's command via Docker API - source_container = await docker_client.get_container(source_id) - source_info = await source_container.show() - source_cmd = source_info["Config"]["Cmd"] - assert source_cmd == custom_command + # Verify source container's command via Docker API + assert docker_client is not None + source_container = await docker_client.get_container(source_id) + source_info = await source_container.show() + source_cmd = source_info["Config"]["Cmd"] + assert source_cmd == custom_command # Verify command is actually running in source container # Use /proc filesystem which is available in all Linux containers @@ -470,20 +463,18 @@ async def test_clone_container_with_custom_command( # Verify clone has different container ID assert cloned_id != source_id - # Only inspect Docker API for local docker - if docker_client_type == "local": - assert docker_client is not None - # Verify cloned container's command via Docker API - cloned_container = await docker_client.get_container(cloned_id) - cloned_info = await cloned_container.show() - cloned_cmd = cloned_info["Config"]["Cmd"] - assert cloned_cmd == custom_command - - # Re-fetch source_info to verify both containers are running - source_container = await docker_client.get_container(source_id) - source_info_check = await source_container.show() - assert source_info_check["State"]["Running"] is True - assert cloned_info["State"]["Running"] is True + # Verify cloned container's command via Docker API + assert docker_client is not None + cloned_container = await docker_client.get_container(cloned_id) + cloned_info = await cloned_container.show() + cloned_cmd = cloned_info["Config"]["Cmd"] + assert cloned_cmd == custom_command + + # Re-fetch source_info to verify both containers are running + source_container = await docker_client.get_container(source_id) + source_info_check = await source_container.show() + assert source_info_check["State"]["Running"] is True + assert cloned_info["State"]["Running"] is True # Verify command is actually running in cloned container cloned_cmdline_result = await manager.exec( @@ -498,7 +489,6 @@ async def test_clone_multi_container_with_network( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -539,19 +529,17 @@ async def test_clone_multi_container_with_network( assert cloned_state.network_id is not None # Verify cloned containers can communicate on new network - # (Use Docker API to verify network attachment - only for local) - if docker_client_type == "local": - assert docker_client is not None - clone_agent = await docker_client.get_container(cloned_state.agent_container_id) - clone_info = await clone_agent.show() - networks = clone_info["NetworkSettings"]["Networks"] - assert len(networks) > 0 + # (Use Docker API to verify network attachment) + assert docker_client is not None + clone_agent = await docker_client.get_container(cloned_state.agent_container_id) + clone_info = await clone_agent.show() + networks = clone_info["NetworkSettings"]["Networks"] + assert len(networks) > 0 async def test_clone_cleanup_removes_temp_images( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -578,31 +566,28 @@ async def test_clone_cleanup_removes_temp_images( cloned_state = await manager.clone_sandbox_state(source_state) cloned_states.append(cloned_state) - # Only verify temp images for local docker - if docker_client_type == "local": - assert docker_client is not None - # Verify temp images exist - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - images = await local_client._docker.images.list() - image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - temp_images_count = sum(1 for tag in image_tags if tag and "temp-clone-" in tag) - assert temp_images_count >= 3 - - # After task cleanup, temp images should be gone (only check for local) - if docker_client_type == "local": + # Verify temp images exist assert docker_client is not None local_client = docker_client assert isinstance(local_client, LocalDockerClient) images = await local_client._docker.images.list() image_tags = [tag for img in images for tag in img.get("RepoTags", [])] + temp_images_count = sum(1 for tag in image_tags if tag and "temp-clone-" in tag) + assert temp_images_count >= 3 + + # After task cleanup, temp images should be gone + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() + image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - # Filter for temp images from this specific execution - # (may have temp images from other tests) - for cloned_state in cloned_states: - # Verify this specific clone's temp image is gone - temp_image_pattern = f"temp-clone-{cloned_state.execution_id}" - assert not any(temp_image_pattern in (tag or "") for tag in image_tags) + # Filter for temp images from this specific execution + # (may have temp images from other tests) + for cloned_state in cloned_states: + # Verify this specific clone's temp image is gone + temp_image_pattern = f"temp-clone-{cloned_state.execution_id}" + assert not any(temp_image_pattern in (tag or "") for tag in image_tags) # ==================== Concurrent Execution Tests ==================== @@ -616,7 +601,6 @@ async def test_parallel_tasks_with_same_task_id( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test that parallel tasks with same task_id are independent.""" @@ -666,7 +650,6 @@ async def test_concurrent_cloning( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test that concurrent cloning operations are safe.""" @@ -717,7 +700,6 @@ async def test_build_image_from_dockerfile( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -741,27 +723,23 @@ async def test_build_image_from_dockerfile( network_config=None, ) - # Cleanup any existing test image (only for local) - if docker_client_type == "local": + # Cleanup any existing test image + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + try: + await local_client._docker.images.delete("prompt-siren-test-build:latest", force=True) + except Exception: + pass + + async with manager.setup_batch([task_setup]): + # Verify image was built assert docker_client is not None local_client = docker_client assert isinstance(local_client, LocalDockerClient) - try: - await local_client._docker.images.delete( - "prompt-siren-test-build:latest", force=True - ) - except Exception: - pass - - async with manager.setup_batch([task_setup]): - # Verify image was built (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - images = await local_client._docker.images.list() - image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - assert any("prompt-siren-test-build:latest" in (tag or "") for tag in image_tags) + images = await local_client._docker.images.list() + image_tags = [tag for img in images for tag in img.get("RepoTags", [])] + assert any("prompt-siren-test-build:latest" in (tag or "") for tag in image_tags) # Create container and verify build marker async with manager.setup_task(task_setup) as sandbox_state: @@ -773,23 +751,19 @@ async def test_build_image_from_dockerfile( assert result.stdout is not None assert "Test build successful" in result.stdout - # Cleanup (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - try: - await local_client._docker.images.delete( - "prompt-siren-test-build:latest", force=True - ) - except Exception: - pass + # Cleanup + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + try: + await local_client._docker.images.delete("prompt-siren-test-build:latest", force=True) + except Exception: + pass async def test_build_with_build_args( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -815,17 +789,16 @@ async def test_build_with_build_args( network_config=None, ) - # Cleanup any existing test image (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - try: - await local_client._docker.images.delete( - "prompt-siren-test-build-args:latest", force=True - ) - except Exception: - pass + # Cleanup any existing test image + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + try: + await local_client._docker.images.delete( + "prompt-siren-test-build-args:latest", force=True + ) + except Exception: + pass async with manager.setup_batch([task_setup]): async with manager.setup_task(task_setup) as sandbox_state: @@ -838,23 +811,21 @@ async def test_build_with_build_args( assert result.stdout is not None assert "custom_value" in result.stdout - # Cleanup (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - try: - await local_client._docker.images.delete( - "prompt-siren-test-build-args:latest", force=True - ) - except Exception: - pass + # Cleanup + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + try: + await local_client._docker.images.delete( + "prompt-siren-test-build-args:latest", force=True + ) + except Exception: + pass async def test_mixed_pull_and_build_specs( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -888,17 +859,14 @@ async def test_mixed_pull_and_build_specs( ), ] - # Cleanup any existing test image (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - try: - await local_client._docker.images.delete( - "prompt-siren-test-mixed:latest", force=True - ) - except Exception: - pass + # Cleanup any existing test image + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + try: + await local_client._docker.images.delete("prompt-siren-test-mixed:latest", force=True) + except Exception: + pass async with manager.setup_batch(task_setups): # Create containers from both images @@ -918,17 +886,14 @@ async def test_mixed_pull_and_build_specs( assert result2.stdout is not None assert "Test build successful" in result2.stdout - # Cleanup (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - try: - await local_client._docker.images.delete( - "prompt-siren-test-mixed:latest", force=True - ) - except Exception: - pass + # Cleanup + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + try: + await local_client._docker.images.delete("prompt-siren-test-mixed:latest", force=True) + except Exception: + pass # ==================== Multi-Stage Build Tests ==================== @@ -942,7 +907,6 @@ async def test_multi_stage_build_creates_all_stages( self, multistage_test_images: list[str], docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -986,29 +950,27 @@ async def test_multi_stage_build_creates_all_stages( network_config=None, ) - # Cleanup any existing images (only for local) - if docker_client_type == "local": + # Cleanup any existing images + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + for tag in [base_tag, env_tag, instance_tag]: + try: + await local_client._docker.images.delete(tag, force=True) + except Exception: # noqa: PERF203 + pass + + async with manager.setup_batch([task_setup]): + # Verify all three images were created assert docker_client is not None local_client = docker_client assert isinstance(local_client, LocalDockerClient) - for tag in [base_tag, env_tag, instance_tag]: - try: - await local_client._docker.images.delete(tag, force=True) - except Exception: # noqa: PERF203 - pass + images = await local_client._docker.images.list() + image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - async with manager.setup_batch([task_setup]): - # Verify all three images were created (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - images = await local_client._docker.images.list() - image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - - assert any(base_tag in (tag or "") for tag in image_tags) - assert any(env_tag in (tag or "") for tag in image_tags) - assert any(instance_tag in (tag or "") for tag in image_tags) + assert any(base_tag in (tag or "") for tag in image_tags) + assert any(env_tag in (tag or "") for tag in image_tags) + assert any(instance_tag in (tag or "") for tag in image_tags) # Create container and verify all stages executed async with manager.setup_task(task_setup) as sandbox_state: @@ -1032,22 +994,20 @@ async def test_multi_stage_build_creates_all_stages( assert result.stdout is not None assert "Instance ready" in result.stdout - # Cleanup (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - for tag in [base_tag, env_tag, instance_tag]: - try: - await local_client._docker.images.delete(tag, force=True) - except Exception: # noqa: PERF203 - pass + # Cleanup + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + for tag in [base_tag, env_tag, instance_tag]: + try: + await local_client._docker.images.delete(tag, force=True) + except Exception: # noqa: PERF203 + pass async def test_multi_stage_build_caching( self, multistage_test_images: list[str], docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -1085,16 +1045,15 @@ async def test_multi_stage_build_caching( container_spec = ContainerSpec(image_spec=multi_stage_spec) agent_container = ContainerSetup(name="agent", spec=container_spec) - # Cleanup any existing images (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - for tag in [base_tag, env_tag, instance_tag]: - try: - await local_client._docker.images.delete(tag, force=True) - except Exception: # noqa: PERF203 - pass + # Cleanup any existing images + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + for tag in [base_tag, env_tag, instance_tag]: + try: + await local_client._docker.images.delete(tag, force=True) + except Exception: # noqa: PERF203 + pass # First build - all stages should be built task_setup1 = TaskSetup( @@ -1105,24 +1064,22 @@ async def test_multi_stage_build_caching( ) async with manager.setup_batch([task_setup1]): - # Verify all images created (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - images = await local_client._docker.images.list() - image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - - assert any(base_tag in (tag or "") for tag in image_tags) - assert any(env_tag in (tag or "") for tag in image_tags) - assert any(instance_tag in (tag or "") for tag in image_tags) - - # Delete only instance image (only for local) - if docker_client_type == "local": + # Verify all images created assert docker_client is not None local_client = docker_client assert isinstance(local_client, LocalDockerClient) - await local_client._docker.images.delete(instance_tag, force=True) + images = await local_client._docker.images.list() + image_tags = [tag for img in images for tag in img.get("RepoTags", [])] + + assert any(base_tag in (tag or "") for tag in image_tags) + assert any(env_tag in (tag or "") for tag in image_tags) + assert any(instance_tag in (tag or "") for tag in image_tags) + + # Delete only instance image + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + await local_client._docker.images.delete(instance_tag, force=True) # Second build - base and env should be cached task_setup2 = TaskSetup( @@ -1133,17 +1090,16 @@ async def test_multi_stage_build_caching( ) async with manager.setup_batch([task_setup2]): - # Verify all images exist again (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - images = await local_client._docker.images.list() - image_tags = [tag for img in images for tag in img.get("RepoTags", [])] + # Verify all images exist again + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + images = await local_client._docker.images.list() + image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - assert any(base_tag in (tag or "") for tag in image_tags) - assert any(env_tag in (tag or "") for tag in image_tags) - assert any(instance_tag in (tag or "") for tag in image_tags) + assert any(base_tag in (tag or "") for tag in image_tags) + assert any(env_tag in (tag or "") for tag in image_tags) + assert any(instance_tag in (tag or "") for tag in image_tags) # Verify functionality async with manager.setup_task(task_setup2) as sandbox_state: @@ -1155,22 +1111,20 @@ async def test_multi_stage_build_caching( assert result.stdout is not None assert "Instance ready" in result.stdout - # Cleanup (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - for tag in [base_tag, env_tag, instance_tag]: - try: - await local_client._docker.images.delete(tag, force=True) - except Exception: # noqa: PERF203 - pass + # Cleanup + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + for tag in [base_tag, env_tag, instance_tag]: + try: + await local_client._docker.images.delete(tag, force=True) + except Exception: # noqa: PERF203 + pass async def test_shared_base_and_env_stages( self, multistage_test_images: list[str], docker_client_type: str, - skip_if_des_unavailable, docker_client: AbstractDockerClient | None, create_manager_config, ): @@ -1251,33 +1205,31 @@ async def test_shared_base_and_env_stages( ), ] - # Cleanup (only for local) - if docker_client_type == "local": + # Cleanup + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + for tag in [base_tag, env_tag, instance1_tag, instance2_tag]: + try: + await local_client._docker.images.delete(tag, force=True) + except Exception: # noqa: PERF203 + pass + + async with manager.setup_batch(task_setups): + # Verify all images created assert docker_client is not None local_client = docker_client assert isinstance(local_client, LocalDockerClient) - for tag in [base_tag, env_tag, instance1_tag, instance2_tag]: - try: - await local_client._docker.images.delete(tag, force=True) - except Exception: # noqa: PERF203 - pass + images = await local_client._docker.images.list() + image_tags = [tag for img in images for tag in img.get("RepoTags", [])] - async with manager.setup_batch(task_setups): - # Verify all images created (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - images = await local_client._docker.images.list() - image_tags = [tag for img in images for tag in img.get("RepoTags", [])] + # Base and env should exist (built once) + assert any(base_tag in (tag or "") for tag in image_tags) + assert any(env_tag in (tag or "") for tag in image_tags) - # Base and env should exist (built once) - assert any(base_tag in (tag or "") for tag in image_tags) - assert any(env_tag in (tag or "") for tag in image_tags) - - # Both instances should exist - assert any(instance1_tag in (tag or "") for tag in image_tags) - assert any(instance2_tag in (tag or "") for tag in image_tags) + # Both instances should exist + assert any(instance1_tag in (tag or "") for tag in image_tags) + assert any(instance2_tag in (tag or "") for tag in image_tags) # Verify both work async with manager.setup_task(task_setups[0]) as state1: @@ -1294,16 +1246,15 @@ async def test_shared_base_and_env_stages( ) assert result.exit_code == 0 - # Cleanup (only for local) - if docker_client_type == "local": - assert docker_client is not None - local_client = docker_client - assert isinstance(local_client, LocalDockerClient) - for tag in [base_tag, env_tag, instance1_tag, instance2_tag]: - try: - await local_client._docker.images.delete(tag, force=True) - except Exception: # noqa: PERF203 - pass + # Cleanup + assert docker_client is not None + local_client = docker_client + assert isinstance(local_client, LocalDockerClient) + for tag in [base_tag, env_tag, instance1_tag, instance2_tag]: + try: + await local_client._docker.images.delete(tag, force=True) + except Exception: # noqa: PERF203 + pass # ==================== Stdin Handling Tests ==================== @@ -1317,7 +1268,6 @@ async def test_exec_with_stdin_string( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test executing a command with stdin as string.""" @@ -1353,7 +1303,6 @@ async def test_exec_with_stdin_bytes( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test executing a command with stdin as bytes.""" @@ -1390,7 +1339,6 @@ async def test_exec_with_stdin_special_characters( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test executing a command with stdin containing special characters.""" @@ -1431,7 +1379,6 @@ async def test_exec_with_stdin_multiline( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test executing a command with multiline stdin.""" @@ -1471,7 +1418,6 @@ async def test_exec_with_stdin_piped_to_command( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test stdin being properly piped to a command that processes it.""" @@ -1509,7 +1455,6 @@ async def test_exec_with_stdin_empty_string( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test executing a command with empty stdin.""" @@ -1544,7 +1489,6 @@ async def test_exec_with_stdin_large_input( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test executing a command with large stdin input.""" @@ -1581,7 +1525,6 @@ async def test_exec_with_stdin_and_environment( self, test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ): """Test executing a command with both stdin and environment variables.""" From ad86e1a3fe53dd42658896c038032c5f93918f83 Mon Sep 17 00:00:00 2001 From: Ivan Evtimov Date: Wed, 10 Dec 2025 07:05:57 -0500 Subject: [PATCH 3/5] address other pull request comments --- docs/docker_backends.md | 6 ----- src/prompt_siren/registry_base.py | 7 ++--- .../docker/plugins/abstract.py | 27 +++---------------- .../sandbox_managers/docker/plugins/errors.py | 2 +- tests/integration/conftest.py | 9 ------- .../test_swebench_tools_integration.py | 1 - 6 files changed, 7 insertions(+), 45 deletions(-) diff --git a/docs/docker_backends.md b/docs/docker_backends.md index b853fdc..e23fe03 100644 --- a/docs/docker_backends.md +++ b/docs/docker_backends.md @@ -147,12 +147,6 @@ Local Docker is implemented as a plugin using the Docker client registry system. See [custom_components.md](custom_components.md) for details on creating custom plugins. -## Performance Considerations - -### Local Docker -- **Startup time**: Fast (~1-2 seconds per container) -- **Execution**: No network latency -- **Best for**: Development, testing, small datasets ## See Also diff --git a/src/prompt_siren/registry_base.py b/src/prompt_siren/registry_base.py index f8dc8bd..bb1f21a 100644 --- a/src/prompt_siren/registry_base.py +++ b/src/prompt_siren/registry_base.py @@ -212,7 +212,6 @@ def create_component( component_type: str, config: BaseModel | None, context: ContextT | None = None, - **kwargs: Any, ) -> ComponentT: """Create a component instance for a given component type and config. @@ -220,7 +219,6 @@ def create_component( component_type: String identifier for the component type config: Configuration object for the component, or None context: Optional context object passed to factory (e.g., sandbox_manager for datasets) - **kwargs: Additional keyword arguments passed to the factory function. Returns: An instance of the component type @@ -245,14 +243,13 @@ def create_component( config_class, factory = self._registry[component_type] - # Component doesn't use config - call factory with only kwargs if config_class is None: if config is not None: raise ValueError( f"{self._component_name.title()} type '{component_type}' doesn't accept config, " f"but config was provided" ) - return factory(**kwargs) + return factory() # Component uses config - validate and call with config and context if config is None: @@ -262,7 +259,7 @@ def create_component( f"Config must be an instance of {config_class.__name__}, got {type(config).__name__}" ) - return factory(config, context, **kwargs) + return factory(config, context) def get_registered_components(self) -> list[str]: """Get a list of all registered component types. diff --git a/src/prompt_siren/sandbox_managers/docker/plugins/abstract.py b/src/prompt_siren/sandbox_managers/docker/plugins/abstract.py index 1c3cc7b..ea39b86 100644 --- a/src/prompt_siren/sandbox_managers/docker/plugins/abstract.py +++ b/src/prompt_siren/sandbox_managers/docker/plugins/abstract.py @@ -6,32 +6,27 @@ from __future__ import annotations -from abc import ABC, abstractmethod from collections.abc import AsyncIterator -from typing import Any +from typing import Any, Protocol from ...abstract import ExecOutput -class AbstractContainer(ABC): +class AbstractContainer(Protocol): """Abstract interface for Docker containers.""" - @abstractmethod async def start(self) -> None: """Start the container.""" ... - @abstractmethod async def stop(self) -> None: """Stop the container.""" ... - @abstractmethod async def delete(self) -> None: """Delete the container.""" ... - @abstractmethod async def show(self) -> dict[str, Any]: """Get container details. @@ -40,7 +35,6 @@ async def show(self) -> dict[str, Any]: """ ... - @abstractmethod async def exec( self, cmd: list[str], @@ -65,7 +59,6 @@ async def exec( """ ... - @abstractmethod async def log(self, stdout: bool, stderr: bool) -> list[str]: """Get container logs. @@ -78,7 +71,6 @@ async def log(self, stdout: bool, stderr: bool) -> list[str]: """ ... - @abstractmethod async def commit(self, repository: str, tag: str) -> None: """Commit container to an image. @@ -89,10 +81,9 @@ async def commit(self, repository: str, tag: str) -> None: ... -class AbstractNetwork(ABC): +class AbstractNetwork(Protocol): """Abstract interface for Docker networks.""" - @abstractmethod async def show(self) -> dict[str, Any]: """Get network details. @@ -101,27 +92,24 @@ async def show(self) -> dict[str, Any]: """ ... - @abstractmethod async def delete(self) -> None: """Delete the network.""" ... -class AbstractDockerClient(ABC): +class AbstractDockerClient(Protocol): """Abstract Docker client interface. This interface provides all Docker operations needed by the workbench, abstracting away the underlying implementation. """ - @abstractmethod async def close(self) -> None: """Close the client and clean up resources.""" ... # Image operations - @abstractmethod async def inspect_image(self, tag: str) -> dict[str, Any]: """Inspect an image. @@ -136,7 +124,6 @@ async def inspect_image(self, tag: str) -> dict[str, Any]: """ ... - @abstractmethod async def pull_image(self, tag: str) -> None: """Pull an image from registry. @@ -145,7 +132,6 @@ async def pull_image(self, tag: str) -> None: """ ... - @abstractmethod async def build_image( self, context_path: str, @@ -171,7 +157,6 @@ async def build_image( yield {} # type: ignore[unreachable] ... - @abstractmethod async def delete_image(self, tag: str, force: bool = False) -> None: """Delete an image. @@ -183,7 +168,6 @@ async def delete_image(self, tag: str, force: bool = False) -> None: # Container operations - @abstractmethod async def create_container(self, config: dict[str, Any], name: str) -> AbstractContainer: """Create a container. @@ -196,7 +180,6 @@ async def create_container(self, config: dict[str, Any], name: str) -> AbstractC """ ... - @abstractmethod async def get_container(self, container_id: str) -> AbstractContainer: """Get a container by ID. @@ -210,7 +193,6 @@ async def get_container(self, container_id: str) -> AbstractContainer: # Network operations - @abstractmethod async def create_network(self, config: dict[str, Any]) -> AbstractNetwork: """Create a network. @@ -222,7 +204,6 @@ async def create_network(self, config: dict[str, Any]) -> AbstractNetwork: """ ... - @abstractmethod async def get_network(self, network_id: str) -> AbstractNetwork: """Get a network by ID. diff --git a/src/prompt_siren/sandbox_managers/docker/plugins/errors.py b/src/prompt_siren/sandbox_managers/docker/plugins/errors.py index 33d4481..81feb6e 100644 --- a/src/prompt_siren/sandbox_managers/docker/plugins/errors.py +++ b/src/prompt_siren/sandbox_managers/docker/plugins/errors.py @@ -10,7 +10,7 @@ class DockerClientError(Exception): operation fails. """ - def __init__(self, message: str, stdout: str = "", stderr: str = ""): + def __init__(self, message: str, stdout: str | None = None, stderr: str | None = None): self.stdout = stdout self.stderr = stderr super().__init__(f"{message}\nstdout: {stdout}\nstderr: {stderr}") diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 75a0a25..87dc92f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -47,15 +47,6 @@ def docker_client_type() -> str: return "local" -@pytest.fixture(scope="module") -def skip_if_des_unavailable(docker_client_type: str): - """Skip test if requested Docker client is unavailable. - - This is a no-op for external tests (always local). - Kept for compatibility with shared test code. - """ - - @pytest.fixture(scope="module") def create_manager_config() -> ManagerConfigFactory: """Factory fixture for creating DockerSandboxConfig. diff --git a/tests/integration/test_swebench_tools_integration.py b/tests/integration/test_swebench_tools_integration.py index d4a639d..45b5396 100644 --- a/tests/integration/test_swebench_tools_integration.py +++ b/tests/integration/test_swebench_tools_integration.py @@ -43,7 +43,6 @@ async def sandbox_manager( test_image: str, docker_client_type: str, - skip_if_des_unavailable, create_manager_config, ) -> AsyncIterator[tuple[AbstractSandboxManager, str, TaskSetup]]: """Create a sandbox manager for tool testing. From 1a3180a12601a4405a3f3e8c5ea17bef8770f5a9 Mon Sep 17 00:00:00 2001 From: Ivan Evtimov Date: Wed, 10 Dec 2025 07:59:44 -0500 Subject: [PATCH 4/5] restore the FIXTURES path --- tests/integration/test_docker_manager_integration.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_docker_manager_integration.py b/tests/integration/test_docker_manager_integration.py index 02a4fc2..cccf774 100644 --- a/tests/integration/test_docker_manager_integration.py +++ b/tests/integration/test_docker_manager_integration.py @@ -38,8 +38,7 @@ pytestmark = pytest.mark.anyio # Path to test fixtures -FIXTURES_DIR = resources.files("tests.integration.fixtures") - +FIXTURES_DIR = Path(__file__).parent / "fixtures" # ==================== Shared Fixtures for Container Reuse ==================== From b3d1bd6533682fa5c1486449e4d1b0cefa6122fa Mon Sep 17 00:00:00 2001 From: Ivan Evtimov Date: Wed, 10 Dec 2025 09:27:43 -0500 Subject: [PATCH 5/5] fix ruff error --- tests/integration/test_docker_manager_integration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_docker_manager_integration.py b/tests/integration/test_docker_manager_integration.py index cccf774..1e795cd 100644 --- a/tests/integration/test_docker_manager_integration.py +++ b/tests/integration/test_docker_manager_integration.py @@ -10,7 +10,6 @@ import asyncio from collections.abc import AsyncIterator -from importlib import resources from pathlib import Path from uuid import uuid4