NousResearch · 0xbyt4 · Feb 26, 2026
diff --git a/environments/community/solidity_audit_env/README.md b/environments/community/solidity_audit_env/README.md
@@ -0,0 +1,96 @@
+# Solidity Smart Contract Security Audit Environment
+
+An Atropos RL environment that trains LLMs to detect security vulnerabilities in Solidity smart contracts.
+
+## Overview
+
+This environment presents Solidity code snippets to an LLM and asks it to:
+1. Determine if the code is vulnerable
+2. Identify the vulnerability category (reentrancy, access control, integer overflow, etc.)
+3. Explain the vulnerability
+4. Suggest a fix
+
+## Dataset
+
+Uses [darkknight25/Smart_Contract_Vulnerability_Dataset](https://huggingface.co/datasets/darkknight25/Smart_Contract_Vulnerability_Dataset) from HuggingFace:
+- ~2,000 Solidity code snippets with labeled vulnerabilities
+- Fields: `code_snippet`, `category`, `description`, `severity`, `vulnerable`
+- MIT License
+
+## Scoring
+
+Multi-component reward function (0.0 - 1.0):
+
+| Component | Weight | Logic |
+|---|---|---|
+| Vulnerability detection | 0.25 | Binary match (vulnerable: true/false) |
+| Category match | 0.35 | Fuzzy string similarity (SequenceMatcher) |
+| Description quality | 0.25 | Keyword Jaccard similarity with ground truth |
+| Format compliance | 0.15 | Valid YAML, all fields present, boxed format |
+
+Additional behaviors:
+- **Length penalty**: When all scores >= 0.9, shorter responses are rewarded
+- **No learning signal**: Returns None when all scores are identical (standard Atropos pattern)
+
+## Expected Output Format
+
+```yaml
+\boxed{
+vulnerable: true
+category: "reentrancy"
+description: "The withdraw function calls an external address before updating the balance state variable"
+fix: "Move the state update before the external call (checks-effects-interactions pattern)"
+}
+```
+
+## Setup
+
+```bash
+pip install -r requirements.txt
+```
+
+## Running
+
+### Process Mode (data generation, no training server needed)
+
+```bash
+cd environments/community/solidity_audit_env
+
+python solidity_audit_env.py process \
+  --env.data_path_to_save_groups audit_output.jsonl \
+  --openai.base_url https://openrouter.ai/api/v1 \
+  --openai.api_key $OPENROUTER_API_KEY \
+  --openai.model_name qwen/qwen3-8b
+```
+
+### Training Mode (requires Atropos training server)
+
+```bash
+python solidity_audit_env.py run
+```
+
+## Testing
+
+```bash
+cd environments/community/solidity_audit_env
+python -m pytest test_scoring.py -v
+```
+
+## File Structure
+
+```
+solidity_audit_env/
+├── README.md                 # This file
+├── solidity_audit_env.py     # Main environment (BaseEnv subclass)
+├── scoring.py                # Reward function helpers
+├── dataset_loader.py         # HuggingFace dataset loading & preprocessing
+├── test_scoring.py           # Unit tests for scoring logic
+└── requirements.txt          # Dependencies
+```
+
+## WandB Metrics
+
+- `train/avg_reward` - Average reward across training batches
+- `train/vuln_detection_accuracy` - Binary vulnerability detection accuracy
+- `train/category_accuracy` - Category matching score
+- `eval/avg_reward` - Average reward on evaluation set
diff --git a/environments/community/solidity_audit_env/dataset_loader.py b/environments/community/solidity_audit_env/dataset_loader.py
@@ -0,0 +1,99 @@
+"""
+Dataset loader for the Solidity Smart Contract Vulnerability Dataset.
+
+Loads and preprocesses the darkknight25/Smart_Contract_Vulnerability_Dataset
+from HuggingFace for use in the Solidity audit RL environment.
+
+The upstream JSONL file contains some malformed rows, so we download the raw
+file and parse line-by-line, skipping invalid entries.
+"""
+
+import json
+import random
+from typing import Dict, List, Tuple
+
+from huggingface_hub import hf_hub_download
+
+DATASET_REPO = "darkknight25/Smart_Contract_Vulnerability_Dataset"
+DATASET_FILENAME = "smartcontract_vuleablities _dataset.jsonl"
+
+REQUIRED_KEYS = {"code_snippet", "category"}
+
+
+def normalize_category(category: str) -> str:
+    """Normalize vulnerability category to lowercase with underscores."""
+    return category.strip().lower().replace(" ", "_").replace("-", "_")
+
+
+def preprocess_entry(entry: Dict) -> Dict:
+    """Normalize a single dataset entry to a consistent format.
+
+    Returns:
+        Dict with keys: code_snippet, category, description, severity, vulnerable
+    """
+    return {
+        "code_snippet": entry["code_snippet"].strip(),
+        "category": normalize_category(entry["category"]),
+        "description": entry.get("description", "").strip(),
+        "severity": entry.get("severity", "unknown").strip().lower(),
+        "vulnerable": bool(entry.get("vulnerable", True)),
+    }
+
+
+def _load_jsonl_robust(filepath: str) -> List[Dict]:
+    """Load a JSONL file, skipping malformed rows."""
+    entries = []
+    skipped = 0
+    with open(filepath, "r", encoding="utf-8") as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                obj = json.loads(line)
+                if isinstance(obj, dict) and REQUIRED_KEYS.issubset(obj.keys()):
+                    entries.append(obj)
+                else:
+                    skipped += 1
+            except json.JSONDecodeError:
+                skipped += 1
+    if skipped:
+        print(f"Warning: skipped {skipped} malformed rows in dataset")
+    return entries
+
+
+def load_vulnerability_dataset(
+    seed: int = 42,
+    test_ratio: float = 0.2,
+) -> Tuple[List[Dict], List[Dict]]:
+    """Load and split the vulnerability dataset into train and test sets.
+
+    Downloads the raw JSONL from HuggingFace Hub and parses line-by-line
+    to handle malformed rows in the upstream dataset.
+
+    Args:
+        seed: Random seed for reproducible splitting.
+        test_ratio: Fraction of data to use for testing.
+
+    Returns:
+        Tuple of (train_data, test_data) where each is a list of preprocessed dicts.
+    """
+    filepath = hf_hub_download(
+        repo_id=DATASET_REPO,
+        filename=DATASET_FILENAME,
+        repo_type="dataset",
+    )
+
+    raw_entries = _load_jsonl_robust(filepath)
+    print(f"Loaded {len(raw_entries)} valid entries from dataset")
+
+    data = [preprocess_entry(e) for e in raw_entries]
+
+    random.seed(seed)
+    random.shuffle(data)
+
+    split_idx = int(len(data) * (1 - test_ratio))
+    train_data = data[:split_idx]
+    test_data = data[split_idx:]
+
+    return train_data, test_data
diff --git a/environments/community/solidity_audit_env/requirements.txt b/environments/community/solidity_audit_env/requirements.txt
@@ -0,0 +1,2 @@
+datasets
+pyyaml