diff --git a/data/inferredbugs/generate_with_verifier.py b/data/inferredbugs/generate_with_verifier.py
new file mode 100644
index 00000000..ec785d4c
--- /dev/null
+++ b/data/inferredbugs/generate_with_verifier.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""
+Generate verified InferredBugs dataset by adding LLM-authored verifiers to an existing tasks dataset.
+"""
+
+import sys
+import argparse
+from pathlib import Path
+
+# Add project root to sys.path
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+# Import from parent package
+from data.commons import (
+    upload_tasks_to_hf, 
+    download_hf_dataset
+)
+from scripts.harbor import tasks_parquet_converter as tpc
+from data.inferredbugs.structural_verifier import inject_inferredbugs_verifier
+
+def update_instructions_with_requirement(dataset_dir: str):
+    """
+    Appends the Deliverable Requirement to each instruction.md file.
+    This ensures the agent knows where to save the fix for the structural verifier.
+    """
+    tasks_root = Path(dataset_dir)
+    print(f"Updating instructions in: {tasks_root}")
+    
+    for task_dir in sorted(tasks_root.iterdir()):
+        if not task_dir.is_dir():
+            continue
+            
+        instr_path = task_dir / "instruction.md"
+        if instr_path.exists():
+            content = instr_path.read_text()
+            
+            # Simple heuristic to find the target file from the instruction text
+            import re
+            file_match = re.search(r"\*\*File:\*\* `(.*?)`", content) or re.search(r"###\s*File:\s*\n(.*)", content)
+            
+            if file_match:
+                target_file = file_match.group(1)
+                requirement = f"""
+
+## Deliverable Requirement
+Write the complete, corrected version of the file to: `/app/{target_file}`.
+The directory does not exist yet. You must create it and write the file:
+```bash
+mkdir -p /app/{target_file.rsplit('/', 1)[0]}
+cat > /app/{target_file} << 'ENDOFFILE'
+<full corrected file content here>
+ENDOFFILE
+```
+"""
+                if "Deliverable Requirement" not in content:
+                    instr_path.write_text(content + requirement)
+
+def main() -> None:
+    """Main function - processes InferredBugs tasks with LLM-authored verifiers"""
+    parser = argparse.ArgumentParser(description="Generate Verified InferredBugs dataset")
+    parser.add_argument("--skip_upload", action="store_true", help="Skip upload to Hugging Face")
+    parser.add_argument("--model", type=str, default="gpt-5-nano", help="LLM to use for authoring verifiers")
+    parser.add_argument("--limit", type=int, default=None, help="Limit the number of tasks to process")
+    args = parser.parse_args()
+    
+    source_repo = "mlfoundations-dev/inferredbugs-sandboxes"
+    target_repo = "DCAgent/inferredbugs-sandboxes-verifier"
+    
+    # 1. Download
+    print(f"Step 1: Downloading source tasks from {source_repo}...")
+    snapshot_dir = Path(download_hf_dataset(source_repo))
+    
+    # 2. Extract tasks
+    print("Step 2: Extracting tasks from parquet files...")
+    parquet_files = sorted(snapshot_dir.rglob("*.parquet"))
+    if not parquet_files:
+        raise FileNotFoundError(f"No parquet files found in {snapshot_dir}")
+    
+    # Use a FIXED directory to allow for resumption and skip logic
+    output_dir = PROJECT_ROOT / "data" / "inferredbugs" / "workdir"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    print(f"Working directory: {output_dir}")
+    
+    # Extract only if the directory is empty, to support resumption
+    if not any(output_dir.iterdir()):
+        print(f"Extracting tasks to: {output_dir}")
+        tpc.from_parquet(
+            parquet_path=str(parquet_files[0]),
+            base=str(output_dir),
+            on_exist="overwrite"
+        )
+    else:
+        print(f"Directory {output_dir} not empty. Skipping extraction to support resumption.")
+
+    # 3. Update Instructions
+    print("Step 3: Appending Deliverable Requirements to instructions...")
+    update_instructions_with_requirement(str(output_dir))
+
+    # 4. Inject Authored Verifiers
+    print(f"Step 4: Authoring and injecting structural verifiers using {args.model}...")
+    # We need to collect the instruction texts again to pass to the authoring engine
+    task_dirs = sorted([d for d in output_dir.iterdir() if d.is_dir()])
+    
+    # Apply limit if specified
+    if args.limit:
+        print(f"Limiting processing to first {args.limit} tasks.")
+        task_dirs = task_dirs[:args.limit]
+
+    instructions = []
+    for d in task_dirs:
+        instr_file = d / "instruction.md"
+        instructions.append(instr_file.read_text() if instr_file.exists() else "")
+
+    inject_inferredbugs_verifier(str(output_dir), instructions, model_name=args.model)
+
+    # 5. Upload
+    if not args.skip_upload:
+        print(f"Step 5: Uploading verified tasks to {target_repo}...")
+        upload_tasks_to_hf(str(output_dir), target_repo)
+        print(f"Success! Tasks uploaded to: https://huggingface.co/datasets/{target_repo}")
+    else:
+        print(f"Upload skipped. Local tasks available in: {output_dir}")
+    
+    print("Verified InferredBugs Generation Complete!")
+
+if __name__ == "__main__":
+    main()
diff --git a/data/inferredbugs/structural_verifier.py b/data/inferredbugs/structural_verifier.py
new file mode 100644
index 00000000..aeffceb4
--- /dev/null
+++ b/data/inferredbugs/structural_verifier.py
@@ -0,0 +1,206 @@
+"""
+Logic for authoring and injecting structural Python verifiers for InferredBugs.
+"""
+
+import os
+import re
+import json
+from pathlib import Path
+from typing import List, Tuple
+
+AUTHORING_PROMPT_TEMPLATE = """
+You are authoring a verification harness for a bug fix task. The harness consists of two files:
+
+- `test.sh`: bash entrypoint — installs ONLY the Python packages actually imported by `test_state.py` (if `test_state.py` only uses stdlib modules like `re`, `os`, `pathlib`, no pip installs are needed), then runs `python3 -u /tests/test_state.py`. Nothing else.
+- `test_state.py`: Python script — reads the agent's submitted file, checks whether the bug is fixed, and writes a reward score
+
+The harness must verify whether the bug described in the "Bug Report" below has been correctly fixed, by statically inspecting the source code (no compilation or execution).
+
+================ CORE PRINCIPLES ================
+1. Analyze Structure, Not Runtime: The environment lacks the .NET SDK. Use Python's `re` module or string parsing to inspect the source code files directly.
+2. Target File: The bug report specifies the target file path. Read this file from the candidate paths (relative and `/app/` prefixed).
+3. Analyze the Bug First: Read the "Before (Buggy File)" section in the bug report. Identify the exact buggy pattern and what class of fixes would address it. Document this in a comment in your verifier code.
+4. The Contract: The Python script MUST write a scalar score (1.0 for success, 0.0 for failure) to the file `/logs/verifier/reward.txt`.
+5. `test.sh` contains ONLY: environment setup (apt-get, pip install) and the single command `python3 -u /tests/test_state.py`. Nothing else — no Python code, no heredocs, no file creation, no cat commands.
+6. `test_state.py` contains ALL verification logic: reading the target file, extracting the method body, checking the fix, and writing the reward. The target file is written by the agent being evaluated — do NOT create or modify it in either script.
+
+================ VERIFICATION QUALITY RULES ================
+These rules are CRITICAL. Violating them produces a useless verifier.
+
+7. Scope checks to the specific buggy method/function named in the bug report. Do NOT check the entire file — patterns that already exist elsewhere in the file will cause false positives on unfixed code.
+
+8. Design a check that would return 0.0 on the ORIGINAL buggy code and 1.0 on any correct fix. You decide the best strategy — presence of a fix construct, absence of a bug pattern, or a combination — whichever is most reliable for this specific bug type and language. Before finalizing, mentally run your check against the original buggy code shown in the bug report: if it would return 1.0, revise it.
+
+9. Accept multiple valid fix patterns. There is rarely only one correct fix. Your check should pass for any reasonable fix, not just one specific implementation.
+
+10. Use a brace-counting parser to extract the exact method body, not a regex that stops at the first closing brace. The reference example shows a simple brace-counter approach.
+
+================ REQUIRED OUTPUT FORMAT ================
+Emit ONLY the two XML blocks below — no prose before, between, or after them.
+
+<bash_script>
+test.sh content here
+</bash_script>
+
+<python_script>
+test_state.py content here
+</python_script>
+
+================ REFERENCE EXAMPLE ================
+
+<bash_script>
+#!/bin/bash
+# Ensure standard setup
+apt-get update > /dev/null 2>&1
+apt-get install -y python3-pip > /dev/null 2>&1
+# Run the judge with unbuffered output for real-time logs
+python3 -u /tests/test_state.py
+</bash_script>
+
+<python_script>
+import re
+import os
+import sys
+import traceback
+from pathlib import Path
+
+RELATIVE_TARGET = "src/Storage/Database.cs"
+BUGGY_METHOD = "void SaveData("
+
+def extract_method_body(code, method_signature):
+    # Brace-counting extractor -- handles nested braces correctly.
+    idx = code.find(method_signature)
+    if idx == -1:
+        return None
+    brace_start = code.find('{{', idx)
+    if brace_start == -1:
+        return None
+    depth = 0
+    for i in range(brace_start, len(code)):
+        if code[i] == '{{':
+            depth += 1
+        elif code[i] == '}}':
+            depth -= 1
+            if depth == 0:
+                return code[brace_start:i+1]
+    return None
+
+def verify():
+    reward_file = Path("/logs/verifier/reward.txt")
+    reward_file.parent.mkdir(parents=True, exist_ok=True)
+
+    candidate_paths = [Path(RELATIVE_TARGET), Path("/app") / RELATIVE_TARGET]
+    target_path = next((p for p in candidate_paths if p.exists()), None)
+    if not target_path:
+        print(f"ERROR: target file not found. Searched: {{[str(p) for p in candidate_paths]}}")
+        reward_file.write_text("0.0")
+        print("VERIFIER: FAIL")
+        return
+
+    code = target_path.read_text()
+    body = extract_method_body(code, BUGGY_METHOD)
+    if body is None:
+        print("ERROR: buggy method not found in file.")
+        reward_file.write_text("0.0")
+        print("VERIFIER: FAIL")
+        return
+
+    # Check for presence of fix construct (using or finally+Dispose)
+    has_using = bool(re.search(r'\busing\s*\(', body))
+    has_finally_dispose = bool(re.search(r'finally', body)) and bool(re.search(r'\.Dispose\(', body))
+    fixed = has_using or has_finally_dispose
+
+    reward_file.write_text("1.0" if fixed else "0.0")
+    print("VERIFIER: PASS" if fixed else "VERIFIER: FAIL")
+
+if __name__ == "__main__":
+    try:
+        verify()
+    except Exception:
+        traceback.print_exc()
+        Path("/logs/verifier/reward.txt").write_text("0.0")
+        sys.exit(1)
+</python_script>
+
+---
+### Bug Report to Process:
+{instruction}
+"""
+
+def author_verifier_harness(instruction: str, task_name: str, model_name: str = "gpt-5-nano") -> Tuple[str, str]:
+    """Calls the LLM to author both test.sh and test_state.py."""
+    try:
+        from openai import OpenAI
+        from openai.types.chat import ChatCompletionUserMessageParam
+        client = OpenAI()
+        
+        prompt = AUTHORING_PROMPT_TEMPLATE.format(instruction=instruction)
+        
+        messages: List[ChatCompletionUserMessageParam] = [
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ]
+        
+        response = client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+        )
+        
+        content = response.choices[0].message.content
+        
+        # Extract Bash script
+        bash_match = re.search(r"<bash_script>(.*?)</bash_script>", content, re.DOTALL)
+        if not bash_match:
+            print(f"ERROR [{task_name}]: Model response is missing <bash_script> tags.")
+        bash_script = bash_match.group(1).strip() if bash_match else "# Error: No bash script authored"
+        
+        # Extract Python script
+        python_match = re.search(r"<python_script>(.*?)</python_script>", content, re.DOTALL)
+        if not python_match:
+            print(f"ERROR [{task_name}]: Model response is missing <python_script> tags.")
+        python_script = python_match.group(1).strip() if python_match else "# Error: No python script authored"
+        
+        return bash_script, python_script
+        
+    except Exception as e:
+        print(f"Error calling LLM for verifier authoring on {task_name}: {e}")
+        return f"#!/bin/bash\\necho 'Error: {e}'\\nexit 1", f"# Error: {str(e)}"
+
+def inject_inferredbugs_verifier(dataset_dir: str, questions: List[str], model_name: str = "gpt-5-nano", max_workers: int = 30):
+    """Orchestrates the authoring and injection of verifiers into tasks."""
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+
+    tasks_root = Path(dataset_dir)
+    task_dirs = sorted([d for d in tasks_root.iterdir() if d.is_dir()], key=lambda x: x.name)
+
+    print(f"Authoring verifier harnesses for {len(task_dirs)} tasks using {model_name} (workers={max_workers})...")
+
+    def process_task(task_dir, instruction):
+        tests_dir = task_dir / "tests"
+        tests_dir.mkdir(exist_ok=True)
+        test_sh_path = tests_dir / "test.sh"
+        test_py_path = tests_dir / "test_state.py"
+
+        if test_sh_path.exists() and test_py_path.exists():
+            print(f"  - Verifier already exists for {task_dir.name}. Skipping.")
+            return
+
+        bash_code, python_code = author_verifier_harness(instruction, task_dir.name, model_name)
+
+        if python_code.startswith("# Error:") or bash_code.startswith("#!/bin/bash\necho 'Error:"):
+            print(f"  - Skipping {task_dir.name}: LLM authoring failed.")
+            return
+
+        with open(test_py_path, "w") as f:
+            f.write(python_code)
+        with open(test_sh_path, "w") as f:
+            f.write(bash_code)
+        os.chmod(test_sh_path, 0o755)
+        print(f"  - Harness generated for {task_dir.name}")
+
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(process_task, td, instr) for td, instr in zip(task_dirs, questions)]
+        for f in as_completed(futures):
+            f.result()
diff --git a/data/self_instruct/generate_with_verifier.py b/data/self_instruct/generate_with_verifier.py
new file mode 100644
index 00000000..0ab27f09
--- /dev/null
+++ b/data/self_instruct/generate_with_verifier.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""
+Generate verified Self-Instruct dataset by adding LLM-authored functional verifiers.
+"""
+
+import tempfile
+import sys
+import argparse
+from pathlib import Path
+
+# Add project root to sys.path
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+# Import from parent package
+from data.commons import (
+    upload_tasks_to_hf, 
+    download_hf_dataset
+)
+from scripts.harbor import tasks_parquet_converter as tpc
+from data.self_instruct.self_instruct_verifier import inject_self_instruct_verifier
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate Verified Self-Instruct dataset")
+    parser.add_argument("--limit", type=int, default=None, help="Limit number of tasks")
+    parser.add_argument("--skip_upload", action="store_true", help="Skip HF upload")
+    parser.add_argument("--model", type=str, default="gpt-5-nano", help="Authoring model")
+    args = parser.parse_args()
+    
+    source_repo = "DCAgent/selfinstruct-naive-sandboxes-2"
+    target_repo = "DCAgent/selfinstruct-naive-sandboxes-2-verified"
+    
+    # 1. Download
+    print(f"Step 1: Downloading source tasks from {source_repo}...")
+    snapshot_dir = Path(download_hf_dataset(source_repo))
+    
+    # 2. Extract tasks
+    output_dir = PROJECT_ROOT / "data" / "self_instruct" / "workdir"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    if not any(output_dir.iterdir()):
+        print(f"Step 2: Extracting tasks to: {output_dir}")
+        parquet_files = sorted(snapshot_dir.rglob("*.parquet"))
+        tpc.from_parquet(parquet_path=str(parquet_files[0]), base=str(output_dir), on_exist="overwrite")
+    else:
+        print(f"Step 2: Reusing tasks in {output_dir}")
+
+    # 3. Inject Verifiers
+    print(f"Step 3: Authoring functional verifiers using {args.model}...")
+    task_dirs = sorted([d for d in output_dir.iterdir() if d.is_dir()])
+    if args.limit:
+        task_dirs = task_dirs[:args.limit]
+        
+    instructions = [ (d / "instruction.md").read_text() for d in task_dirs ]
+    
+    inject_self_instruct_verifier(str(output_dir), instructions, model_name=args.model)
+
+    # 4. Upload
+    if not args.skip_upload:
+        print(f"Step 4: Uploading to {target_repo}...")
+        upload_tasks_to_hf(str(output_dir), target_repo)
+    else:
+        print(f"Upload skipped. Tasks available in: {output_dir}")
+
+if __name__ == "__main__":
+    main()
diff --git a/data/self_instruct/self_instruct_verifier.py b/data/self_instruct/self_instruct_verifier.py
new file mode 100644
index 00000000..bda43178
--- /dev/null
+++ b/data/self_instruct/self_instruct_verifier.py
@@ -0,0 +1,208 @@
+"""
+Logic for authoring and injecting functional Python verifiers for Self-Instruct tasks.
+"""
+
+import os
+import re
+import json
+from pathlib import Path
+from typing import List, Tuple
+
+AUTHORING_PROMPT_TEMPLATE = """
+You are authoring a verification harness for a task completion evaluation. The harness consists of two files:
+
+- `test.sh`: bash entrypoint — installs the Python packages imported by `test_state.py` (if `test_state.py` only uses stdlib modules like `re`, `os`, `pathlib`, no pip installs are needed), then runs `python3 -u /tests/test_state.py`.
+- `test_state.py`: Python script — verifies the functional outcome of the task and writes a reward score.
+
+The harness must verify whether the agent successfully completed the "Task Instruction" below by checking the state of the sandbox (files created, content of files, permissions, etc.).
+
+================ CORE PRINCIPLES ================
+1. Verify Outcomes, Not Just Code: Focus on the side effects. If the task asks to create a file, check if that file exists and has the correct content.
+2. Deliverables: The instruction often lists specific "Deliverables". Your verifier MUST check for these files at their specified paths.
+3. Exhaustive Constraint Matching & Comprehensive Testing: Identify every technical constraint (e.g., 'recursive', 'case-insensitive', 'human-readable'). Your Python script MUST independently verify each requirement by creating specific test scenarios (e.g., nested folders for recursion, varied cases for case-insensitivity). The final score must be 1.0 only if **all** checks pass.
+4. Descriptive Logging: Print clear "CHECK PASSED" or "CHECK FAILED" messages for every specific requirement you verify.
+5. Robustness: Handle whitespace, case sensitivity (if appropriate), and varied formatting in the agent's output. 
+6. The Contract: The Python script MUST write a scalar score (1.0 for success, 0.0 for failure) to the file `/logs/verifier/reward.txt`.
+7. Environment: The `test.sh` bash script must install any dependencies required by your `test_state.py` Python script.
+8. Path Handling: The agent will be working in a sandbox. Focus your checks on the file names and relative paths specifically mentioned in the "Task Instruction." Do not assume absolute paths (like /app) unless they are explicitly stated in the instruction.
+
+================ REQUIRED OUTPUT FORMAT ================
+Emit ONLY the two XML blocks below — no prose before, between, or after them.
+
+<bash_script>
+test.sh content here
+</bash_script>
+
+<python_script>
+test_state.py content here
+</python_script>
+
+================ REFERENCE EXAMPLE ================
+
+--- EXAMPLE INSTRUCTION ---
+# Replace default with custom in /tmp .log files
+
+Goal: Find all files in the /tmp directory with a .log extension and replace the word "default" with "custom" in each file, editing them in place.
+
+Steps:
+
+1. Create a script named fix_tmp_logs.sh that will:
+   - Enumerate all .log files under /tmp (for example using find /tmp -type f -name "*.log" -print0).
+   - For every file found, apply an in-place substitution that replaces the word default with custom, matching whole words only.
+   - Print a short summary of how many files were updated.
+
+2. Example implementation outline (you may adapt the exact syntax):
+   - #!/usr/bin/env bash
+     set -euo pipefail
+     count=0
+     while IFS= read -r -d '' file; do
+       perl -0777 -pe 's/\\bdefault\\b/custom/g' -i "$file" || exit 1
+       ((count++))
+     done < <(find /tmp -type f -name "*.log" -print0)
+     echo "$count file(s) updated."
+
+3. Run the script and verify:
+   - bash fix_tmp_logs.sh
+   - Verify there are no remaining 'default' occurrences in /tmp/*.log:
+     grep -R --word-regexp -n 'default' /tmp/*.log 2>/dev/null || true
+
+Deliverables:
+- A single executable script named fix_tmp_logs.sh that performs the task when run. Include executable permissions (chmod +x fix_tmp_logs.sh) as part of instructions. Optionally provide a short log of the changes.
+
+--- EXAMPLE VERIFIER ---
+<bash_script>
+#!/bin/bash
+# 1. Standard setup
+apt-get update > /dev/null 2>&1
+apt-get install -y python3 > /dev/null 2>&1
+# 2. Run the functional judge
+python3 -u /tests/test_state.py
+</bash_script>
+
+<python_script>
+import os
+import subprocess
+from pathlib import Path
+
+def verify():
+    print("--- Starting Functional Verification ---")
+    reward_file = Path("/logs/verifier/reward.txt")
+    reward_file.parent.mkdir(parents=True, exist_ok=True)
+    
+    # 1. Locate the agent's deliverable
+    script_name = "fix_tmp_logs.sh"
+    # Robust search for the script (check both current dir and /app)
+    candidate_paths = [Path(script_name), Path("/app") / script_name]
+    script_path = next((p for p in candidate_paths if p.exists()), None)
+    
+    if not script_path:
+        print(f"FAIL: {{script_name}} not found.")
+        reward_file.write_text("0.0")
+        return
+
+    # 2. Setup a test case (Prepare dummy data)
+    test_log = Path("/tmp/test_verifier_dummy.log")
+    test_log.write_text("This is a default value. Also, another default here.")
+    print(f"Created test log at {{test_log}}")
+
+    # 3. Execute the agent's work
+    try:
+        os.chmod(script_path, 0o755)
+        result = subprocess.run(["bash", str(script_path)], capture_output=True, text=True, timeout=30)
+        print(f"STDOUT: {{result.stdout}}")
+    except Exception as e:
+        print(f"ERROR: Script failed to run: {{e}}")
+        reward_file.write_text("0.0")
+        return
+
+    # 4. Validate the outcome (Side effects)
+    content = test_log.read_text()
+    if "custom" in content and "default" not in content:
+        print("PASS: Substitution successful.")
+        reward_file.write_text("1.0")
+    else:
+        print("FAIL: Substitution failed or 'default' remains.")
+        reward_file.write_text("0.0")
+
+    if test_log.exists(): test_log.unlink()
+
+if __name__ == "__main__":
+    verify()
+</python_script>
+
+---
+### Task Instruction to Process:
+{instruction}
+"""
+
+def author_verifier_harness(instruction: str, task_name: str, model_name: str = "gpt-5-nano") -> Tuple[str, str]:
+    """Calls the LLM to author both test.sh and test_state.py."""
+    try:
+        from openai import OpenAI
+        from openai.types.chat import ChatCompletionUserMessageParam
+        client = OpenAI()
+        
+        prompt = AUTHORING_PROMPT_TEMPLATE.format(instruction=instruction)
+        
+        messages: List[ChatCompletionUserMessageParam] = [
+            {
+                "role": "user",
+                "content": prompt,
+            }
+        ]
+        
+        response = client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            temperature=1.0
+        )
+        
+        content = response.choices[0].message.content
+        
+        # Extract scripts
+        bash_match = re.search(r"<bash_script>(.*?)</bash_script>", content, re.DOTALL)
+        if not bash_match:
+            print(f"ERROR [{task_name}]: Missing <bash_script>")
+        bash_script = bash_match.group(1).strip() if bash_match else "# Error"
+        
+        python_match = re.search(r"<python_script>(.*?)</python_script>", content, re.DOTALL)
+        if not python_match:
+            print(f"ERROR [{task_name}]: Missing <python_script>")
+        python_script = python_match.group(1).strip() if python_match else "# Error"
+        
+        return bash_script, python_script
+        
+    except Exception as e:
+        print(f"Error calling LLM for verifier authoring on {task_name}: {e}")
+        return "#!/bin/bash\nexit 1", f"# Error: {str(e)}"
+
+def inject_self_instruct_verifier(dataset_dir: str, questions: List[str], model_name: str = "gpt-5-nano", max_workers: int = 30):
+    """Orchestrates the authoring and injection of verifiers into tasks."""
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+
+    tasks_root = Path(dataset_dir)
+    task_dirs = sorted([d for d in tasks_root.iterdir() if d.is_dir()], key=lambda x: x.name)
+
+    print(f"Authoring functional verifiers for {len(task_dirs)} tasks using {model_name}...")
+
+    def process_task(task_dir, instruction):
+        tests_dir = task_dir / "tests"
+        tests_dir.mkdir(exist_ok=True)
+        
+        if (tests_dir / "test_state.py").exists():
+            print(f"[{task_dir.name}] Skipping - verifier already exists.")
+            return
+
+        bash_code, python_code = author_verifier_harness(instruction, task_dir.name, model_name)
+
+        with open(tests_dir / "test_state.py", "w") as f:
+            f.write(python_code)
+        with open(tests_dir / "test.sh", "w") as f:
+            f.write(bash_code)
+        os.chmod(tests_dir / "test.sh", 0o755)
+        print(f"[{task_dir.name}] Verifier generated.")
+
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(process_task, td, instr) for td, instr in zip(task_dirs, questions)]
+        for f in as_completed(futures):
+            f.result()