Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 231 additions & 0 deletions data/patchers/patch_logsmith-easy_500.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#!/usr/bin/env python3
"""
Patch logsmith-easy-500 tasks to:
1. Move Dockerfile into environment/Dockerfile (expected by validate_and_upload_from_hf.py)
2. Move test.sh into tests/test.sh (expected by harbor)
3. Remove COPY seeds/ from the Dockerfile (reduces 500 unique snapshots to 1)
4. Copy seeds/ into setup_files/ so Harbor uploads data to /workspace/ before agent runs

Original flat task structure:
task_000001/
Dockerfile
instruction.md
task.json
task.toml
test.sh
seeds/
data/
logs/
decoys/

Patched structure:
task_000001/
instruction.md
task.json
task.toml
environment/
Dockerfile <- moved here, COPY seeds/ removed
tests/
test.sh <- moved here
setup_files/
data/ <- copied from seeds/data/ for Harbor upload
logs/
decoys/
seeds/ <- left in place (untouched)

Usage:
python patch_logsmith_tasks.py /path/to/tasks

# Write to a separate output directory (leaves originals untouched)
python patch_logsmith_tasks.py /path/to/tasks --output-dir /path/to/patched

# Dry run
python patch_logsmith_tasks.py /path/to/tasks --dry-run
"""

from __future__ import annotations

import argparse
import shutil
from pathlib import Path


# ---------------------------------------------------------------------------
# Templates
# ---------------------------------------------------------------------------

DOCKERFILE_TEMPLATE = """\
FROM ubuntu:22.04
ENV DEBIAN_FRONTEND=noninteractive LC_ALL=C.UTF-8 LANG=C.UTF-8 TZ=UTC LOG_DIR=/logs/verifier
WORKDIR /workspace

RUN apt-get update \\
&& apt-get install -y --no-install-recommends tmux=3.2a-4ubuntu0.2 \\
&& rm -rf /var/lib/apt/lists/*

# Seed data is uploaded to /workspace/ by Harbor before the agent runs
# (via setup_files/ mechanism) — no COPY needed here.

# Non-root user (safer by default).
RUN useradd -m -u 1000 agent \\
&& mkdir -p /logs/verifier /output \\
&& chown -R agent:agent /workspace /logs /output
USER agent
"""


# ---------------------------------------------------------------------------
# Patching logic
# ---------------------------------------------------------------------------

def patch_task(
task_dir: Path,
output_dir: Path | None = None,
dry_run: bool = False,
) -> dict[str, bool | str]:
"""Patch a single task directory. Returns dict of what was changed."""
changes: dict[str, bool | str] = {}

# Validate expected flat structure
dockerfile_src = task_dir / "Dockerfile"
test_sh_src = task_dir / "test.sh"
seeds_dir = task_dir / "seeds"

if not dockerfile_src.exists():
return {"error": True, "reason": "no Dockerfile"}
if not seeds_dir.exists():
return {"error": True, "reason": "no seeds/ dir"}
Comment on lines +94 to +97
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using a dictionary to return error states mixes normal results with error information. A more Pythonic approach is to raise a custom exception for exceptional cases like a missing Dockerfile. This separates error handling from the main logic and improves code clarity.

You could define class PatchTaskError(Exception): pass and then raise it here. The caller in main would then use a try...except PatchTaskError block.


# Determine target directory
if output_dir:
target = output_dir / task_dir.name
if not dry_run:
if target.exists():
shutil.rmtree(target)
shutil.copytree(task_dir, target)
else:
target = task_dir

# --- 1. environment/Dockerfile: move + replace with generic version ---
env_dir = target / "environment"
target_dockerfile = env_dir / "Dockerfile"

if dry_run:
changes["environment/Dockerfile"] = True
else:
env_dir.mkdir(parents=True, exist_ok=True)
target_dockerfile.write_text(DOCKERFILE_TEMPLATE)
# Remove the old flat Dockerfile
old_dockerfile = target / "Dockerfile"
if old_dockerfile.exists():
old_dockerfile.unlink()
# Remove any seeds/ that got copied under environment/ by shutil.copytree
env_seeds = env_dir / "seeds"
if env_seeds.exists():
shutil.rmtree(env_seeds)
Comment on lines +122 to +125
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block of code, which removes environment/seeds/, appears to be redundant. The shutil.copytree call on line 105 copies the contents of task_dir to target. The environment directory is created later on line 116. Therefore, target/environment/seeds/ should not exist at this point in the execution. This code can likely be removed to simplify the script.

changes["environment/Dockerfile"] = True

# --- 2. tests/test.sh: move test.sh into tests/ subdir ---
tests_dir = target / "tests"
target_test_sh = tests_dir / "test.sh"
old_test_sh = target / "test.sh"

if target_test_sh.exists():
changes["tests/test.sh"] = False # already moved
elif dry_run:
changes["tests/test.sh"] = bool(test_sh_src.exists())
else:
if old_test_sh.exists():
tests_dir.mkdir(parents=True, exist_ok=True)
shutil.move(str(old_test_sh), str(target_test_sh))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The explicit conversion of pathlib.Path objects to strings using str() is not necessary here. Since Python 3.9, shutil.move directly accepts path-like objects. Removing the str() calls will make the code more modern and idiomatic.

Suggested change
shutil.move(str(old_test_sh), str(target_test_sh))
shutil.move(old_test_sh, target_test_sh)

changes["tests/test.sh"] = True
else:
changes["tests/test.sh"] = False

# --- 3. setup_files/: copy seeds/data/ so Harbor uploads it ---
# Harbor uploads setup_files/ contents to /workspace/ before the agent
# runs. We copy seeds/data/ -> setup_files/data/ so the workspace layout
# the agent sees is identical to the original (data/ at /workspace/data/).
target_seeds_data = target / "seeds" / "data"
target_setup_files = target / "setup_files"
already_patched = target_setup_files.exists() and any(target_setup_files.iterdir())

if already_patched:
changes["setup_files"] = False
elif dry_run:
changes["setup_files"] = True
else:
target_setup_files.mkdir(parents=True, exist_ok=True)
if target_seeds_data.exists():
shutil.copytree(target_seeds_data, target_setup_files / "data")
changes["setup_files"] = True

return changes


def main():
parser = argparse.ArgumentParser(
description="Patch logsmith tasks: restructure to environment/Dockerfile + tests/test.sh, remove COPY seeds/",
)
parser.add_argument(
"tasks_dir", help="Root directory containing task folders")
parser.add_argument(
"--output-dir",
type=Path,
default=None,
help="Write patched tasks to this directory (default: patch in-place)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would change without writing",
)
args = parser.parse_args()

tasks_root = Path(args.tasks_dir)
if not tasks_root.is_dir():
raise SystemExit(f"Not a directory: {tasks_root}")

task_dirs = sorted(
d for d in tasks_root.iterdir()
if d.is_dir() and (d / "instruction.md").exists()
)
print(f"Found {len(task_dirs)} tasks in {tasks_root}")

if args.output_dir and not args.dry_run:
args.output_dir.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {args.output_dir}")

totals: dict[str, int] = {}
errors = 0
for td in task_dirs:
changes = patch_task(
td, output_dir=args.output_dir, dry_run=args.dry_run)
if changes.get("error"):
errors += 1
print(f" ERROR {td.name}: {changes.get('reason')}")
continue
for k, v in changes.items():
if v:
totals[k] = totals.get(k, 0) + 1

action = "Would patch" if args.dry_run else "Patched"
print(f"\n{action}:")
for filename, count in sorted(totals.items()):
print(f" {filename}: {count}/{len(task_dirs)}")
if errors:
print(f" Errors: {errors}")

# Report unique Dockerfiles after patching
if not args.dry_run:
out_root = args.output_dir or tasks_root
dockerfiles: set[str] = set()
for td in sorted(out_root.iterdir()):
df = td / "environment" / "Dockerfile"
if df.exists():
dockerfiles.add(df.read_text())
print(f"\nUnique Dockerfiles: {len(dockerfiles)}")


if __name__ == "__main__":
main()