-
Notifications
You must be signed in to change notification settings - Fork 18
Add patch for logsmith-easy_500 #24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,231 @@ | ||||||
| #!/usr/bin/env python3 | ||||||
| """ | ||||||
| Patch logsmith-easy-500 tasks to: | ||||||
| 1. Move Dockerfile into environment/Dockerfile (expected by validate_and_upload_from_hf.py) | ||||||
| 2. Move test.sh into tests/test.sh (expected by harbor) | ||||||
| 3. Remove COPY seeds/ from the Dockerfile (reduces 500 unique snapshots to 1) | ||||||
| 4. Copy seeds/ into setup_files/ so Harbor uploads data to /workspace/ before agent runs | ||||||
|
|
||||||
| Original flat task structure: | ||||||
| task_000001/ | ||||||
| Dockerfile | ||||||
| instruction.md | ||||||
| task.json | ||||||
| task.toml | ||||||
| test.sh | ||||||
| seeds/ | ||||||
| data/ | ||||||
| logs/ | ||||||
| decoys/ | ||||||
|
|
||||||
| Patched structure: | ||||||
| task_000001/ | ||||||
| instruction.md | ||||||
| task.json | ||||||
| task.toml | ||||||
| environment/ | ||||||
| Dockerfile <- moved here, COPY seeds/ removed | ||||||
| tests/ | ||||||
| test.sh <- moved here | ||||||
| setup_files/ | ||||||
| data/ <- copied from seeds/data/ for Harbor upload | ||||||
| logs/ | ||||||
| decoys/ | ||||||
| seeds/ <- left in place (untouched) | ||||||
|
|
||||||
| Usage: | ||||||
| python patch_logsmith_tasks.py /path/to/tasks | ||||||
|
|
||||||
| # Write to a separate output directory (leaves originals untouched) | ||||||
| python patch_logsmith_tasks.py /path/to/tasks --output-dir /path/to/patched | ||||||
|
|
||||||
| # Dry run | ||||||
| python patch_logsmith_tasks.py /path/to/tasks --dry-run | ||||||
| """ | ||||||
|
|
||||||
| from __future__ import annotations | ||||||
|
|
||||||
| import argparse | ||||||
| import shutil | ||||||
| from pathlib import Path | ||||||
|
|
||||||
|
|
||||||
| # --------------------------------------------------------------------------- | ||||||
| # Templates | ||||||
| # --------------------------------------------------------------------------- | ||||||
|
|
||||||
| DOCKERFILE_TEMPLATE = """\ | ||||||
| FROM ubuntu:22.04 | ||||||
| ENV DEBIAN_FRONTEND=noninteractive LC_ALL=C.UTF-8 LANG=C.UTF-8 TZ=UTC LOG_DIR=/logs/verifier | ||||||
| WORKDIR /workspace | ||||||
|
|
||||||
| RUN apt-get update \\ | ||||||
| && apt-get install -y --no-install-recommends tmux=3.2a-4ubuntu0.2 \\ | ||||||
| && rm -rf /var/lib/apt/lists/* | ||||||
|
|
||||||
| # Seed data is uploaded to /workspace/ by Harbor before the agent runs | ||||||
| # (via setup_files/ mechanism) — no COPY needed here. | ||||||
|
|
||||||
| # Non-root user (safer by default). | ||||||
| RUN useradd -m -u 1000 agent \\ | ||||||
| && mkdir -p /logs/verifier /output \\ | ||||||
| && chown -R agent:agent /workspace /logs /output | ||||||
| USER agent | ||||||
| """ | ||||||
|
|
||||||
|
|
||||||
| # --------------------------------------------------------------------------- | ||||||
| # Patching logic | ||||||
| # --------------------------------------------------------------------------- | ||||||
|
|
||||||
| def patch_task( | ||||||
| task_dir: Path, | ||||||
| output_dir: Path | None = None, | ||||||
| dry_run: bool = False, | ||||||
| ) -> dict[str, bool | str]: | ||||||
| """Patch a single task directory. Returns dict of what was changed.""" | ||||||
| changes: dict[str, bool | str] = {} | ||||||
|
|
||||||
| # Validate expected flat structure | ||||||
| dockerfile_src = task_dir / "Dockerfile" | ||||||
| test_sh_src = task_dir / "test.sh" | ||||||
| seeds_dir = task_dir / "seeds" | ||||||
|
|
||||||
| if not dockerfile_src.exists(): | ||||||
| return {"error": True, "reason": "no Dockerfile"} | ||||||
| if not seeds_dir.exists(): | ||||||
| return {"error": True, "reason": "no seeds/ dir"} | ||||||
|
|
||||||
| # Determine target directory | ||||||
| if output_dir: | ||||||
| target = output_dir / task_dir.name | ||||||
| if not dry_run: | ||||||
| if target.exists(): | ||||||
| shutil.rmtree(target) | ||||||
| shutil.copytree(task_dir, target) | ||||||
| else: | ||||||
| target = task_dir | ||||||
|
|
||||||
| # --- 1. environment/Dockerfile: move + replace with generic version --- | ||||||
| env_dir = target / "environment" | ||||||
| target_dockerfile = env_dir / "Dockerfile" | ||||||
|
|
||||||
| if dry_run: | ||||||
| changes["environment/Dockerfile"] = True | ||||||
| else: | ||||||
| env_dir.mkdir(parents=True, exist_ok=True) | ||||||
| target_dockerfile.write_text(DOCKERFILE_TEMPLATE) | ||||||
| # Remove the old flat Dockerfile | ||||||
| old_dockerfile = target / "Dockerfile" | ||||||
| if old_dockerfile.exists(): | ||||||
| old_dockerfile.unlink() | ||||||
| # Remove any seeds/ that got copied under environment/ by shutil.copytree | ||||||
| env_seeds = env_dir / "seeds" | ||||||
| if env_seeds.exists(): | ||||||
| shutil.rmtree(env_seeds) | ||||||
|
Comment on lines
+122
to
+125
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This block of code, which removes |
||||||
| changes["environment/Dockerfile"] = True | ||||||
|
|
||||||
| # --- 2. tests/test.sh: move test.sh into tests/ subdir --- | ||||||
| tests_dir = target / "tests" | ||||||
| target_test_sh = tests_dir / "test.sh" | ||||||
| old_test_sh = target / "test.sh" | ||||||
|
|
||||||
| if target_test_sh.exists(): | ||||||
| changes["tests/test.sh"] = False # already moved | ||||||
| elif dry_run: | ||||||
| changes["tests/test.sh"] = bool(test_sh_src.exists()) | ||||||
| else: | ||||||
| if old_test_sh.exists(): | ||||||
| tests_dir.mkdir(parents=True, exist_ok=True) | ||||||
| shutil.move(str(old_test_sh), str(target_test_sh)) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The explicit conversion of
Suggested change
|
||||||
| changes["tests/test.sh"] = True | ||||||
| else: | ||||||
| changes["tests/test.sh"] = False | ||||||
|
|
||||||
| # --- 3. setup_files/: copy seeds/data/ so Harbor uploads it --- | ||||||
| # Harbor uploads setup_files/ contents to /workspace/ before the agent | ||||||
| # runs. We copy seeds/data/ -> setup_files/data/ so the workspace layout | ||||||
| # the agent sees is identical to the original (data/ at /workspace/data/). | ||||||
| target_seeds_data = target / "seeds" / "data" | ||||||
| target_setup_files = target / "setup_files" | ||||||
| already_patched = target_setup_files.exists() and any(target_setup_files.iterdir()) | ||||||
|
|
||||||
| if already_patched: | ||||||
| changes["setup_files"] = False | ||||||
| elif dry_run: | ||||||
| changes["setup_files"] = True | ||||||
| else: | ||||||
| target_setup_files.mkdir(parents=True, exist_ok=True) | ||||||
| if target_seeds_data.exists(): | ||||||
| shutil.copytree(target_seeds_data, target_setup_files / "data") | ||||||
| changes["setup_files"] = True | ||||||
|
|
||||||
| return changes | ||||||
|
|
||||||
|
|
||||||
| def main(): | ||||||
| parser = argparse.ArgumentParser( | ||||||
| description="Patch logsmith tasks: restructure to environment/Dockerfile + tests/test.sh, remove COPY seeds/", | ||||||
| ) | ||||||
| parser.add_argument( | ||||||
| "tasks_dir", help="Root directory containing task folders") | ||||||
| parser.add_argument( | ||||||
| "--output-dir", | ||||||
| type=Path, | ||||||
| default=None, | ||||||
| help="Write patched tasks to this directory (default: patch in-place)", | ||||||
| ) | ||||||
| parser.add_argument( | ||||||
| "--dry-run", | ||||||
| action="store_true", | ||||||
| help="Show what would change without writing", | ||||||
| ) | ||||||
| args = parser.parse_args() | ||||||
|
|
||||||
| tasks_root = Path(args.tasks_dir) | ||||||
| if not tasks_root.is_dir(): | ||||||
| raise SystemExit(f"Not a directory: {tasks_root}") | ||||||
|
|
||||||
| task_dirs = sorted( | ||||||
| d for d in tasks_root.iterdir() | ||||||
| if d.is_dir() and (d / "instruction.md").exists() | ||||||
| ) | ||||||
| print(f"Found {len(task_dirs)} tasks in {tasks_root}") | ||||||
|
|
||||||
| if args.output_dir and not args.dry_run: | ||||||
| args.output_dir.mkdir(parents=True, exist_ok=True) | ||||||
| print(f"Output directory: {args.output_dir}") | ||||||
|
|
||||||
| totals: dict[str, int] = {} | ||||||
| errors = 0 | ||||||
| for td in task_dirs: | ||||||
| changes = patch_task( | ||||||
| td, output_dir=args.output_dir, dry_run=args.dry_run) | ||||||
| if changes.get("error"): | ||||||
| errors += 1 | ||||||
| print(f" ERROR {td.name}: {changes.get('reason')}") | ||||||
| continue | ||||||
| for k, v in changes.items(): | ||||||
| if v: | ||||||
| totals[k] = totals.get(k, 0) + 1 | ||||||
|
|
||||||
| action = "Would patch" if args.dry_run else "Patched" | ||||||
| print(f"\n{action}:") | ||||||
| for filename, count in sorted(totals.items()): | ||||||
| print(f" {filename}: {count}/{len(task_dirs)}") | ||||||
| if errors: | ||||||
| print(f" Errors: {errors}") | ||||||
|
|
||||||
| # Report unique Dockerfiles after patching | ||||||
| if not args.dry_run: | ||||||
| out_root = args.output_dir or tasks_root | ||||||
| dockerfiles: set[str] = set() | ||||||
| for td in sorted(out_root.iterdir()): | ||||||
| df = td / "environment" / "Dockerfile" | ||||||
| if df.exists(): | ||||||
| dockerfiles.add(df.read_text()) | ||||||
| print(f"\nUnique Dockerfiles: {len(dockerfiles)}") | ||||||
|
|
||||||
|
|
||||||
| if __name__ == "__main__": | ||||||
| main() | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using a dictionary to return error states mixes normal results with error information. A more Pythonic approach is to raise a custom exception for exceptional cases like a missing
Dockerfile. This separates error handling from the main logic and improves code clarity.You could define
class PatchTaskError(Exception): passand then raise it here. The caller inmainwould then use atry...except PatchTaskErrorblock.