diff --git a/.github/workflows/fork-ci.yml b/.github/workflows/fork-ci.yml new file mode 100644 index 0000000..ccc5bc8 --- /dev/null +++ b/.github/workflows/fork-ci.yml @@ -0,0 +1,138 @@ +# Fork-level CI for HaraldeRoessler/moltrust-api. +# +# Intentionally lightweight — exercises the things that catch the +# common breakage modes without needing a running API or database: +# +# syntax — `python -m compileall` on every source dir. +# import-smoke — actually try `from app.main import app` with all +# required env vars set to placeholder values. This +# is the only job that catches startup-time +# RuntimeError raises (e.g. NONCE_SECRET unset). +# pytest-coll — `pytest --collect-only` over the in-repo tests. +# Catches ImportError / SyntaxError in test modules +# without needing the API + Postgres stack online. +# ruff — informational lint, never blocks merge. +# bandit — informational SAST, never blocks merge. +# +# Triggers on every push and PR. Separate file name (`fork-ci.yml`) +# from PR #14's proposed `ci.yml` so the two can co-exist if PR #14 +# lands upstream later. + +name: Fork CI + +on: + push: + pull_request: + workflow_dispatch: + +permissions: + contents: read + security-events: write + +concurrency: + group: fork-ci-${{ github.ref }} + cancel-in-progress: true + +jobs: + syntax: + name: syntax (compileall) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: byte-compile every source dir + run: | + python -m compileall -q app + for d in scripts agents agent operator moltbook monitor; do + [ -d "$d" ] && python -m compileall -q "$d" || true + done + # Also top-level scripts that aren't in a directory. + python -m compileall -q seed_ecosystem.py test_protocol_compliance.py test_sandbox.py 2>/dev/null || true + + import-smoke: + name: import smoke test (with required env vars) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: pip + cache-dependency-path: requirements.txt + - name: install requirements + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: import app.main with placeholder env + # The whole point of this job: catch any startup-time + # RuntimeError. The security hardening pass added fail-fast + # checks for NONCE_SECRET, MOLTRUST_API_KEYS, MOLTSTACK_DB_PW + # — if one of those is missing, the import explodes here + # rather than in production. + env: + MOLTRUST_API_KEYS: 'mt_ci_placeholder_key_does_not_authenticate' + NONCE_SECRET: 'ci-placeholder-nonce-secret' + MOLTSTACK_DB_PW: 'ci-placeholder-db-pw' + MOLTRUST_ADMIN_USERS: 'ci-admin:admin:$2b$12$ciplaceholderhashthatwillnevermatchanypassword.ciplaceholderhash' + MOLTRUST_ENV: 'ci' + run: | + python -c "from app.main import app; print('app.main imported OK, FastAPI app:', type(app).__name__)" + + pytest-collect: + name: pytest --collect-only + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: pip + cache-dependency-path: requirements.txt + - name: install requirements + pytest + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest pytest-asyncio + - name: collect tests + # Collection imports test modules without running them. Catches + # ImportError / SyntaxError / fixture-parse errors without + # needing the API stack online. + env: + MOLTRUST_API_KEYS: 'mt_ci_placeholder_key' + NONCE_SECRET: 'ci-placeholder' + MOLTSTACK_DB_PW: 'ci-placeholder' + run: | + pytest --collect-only -q tests/ test_*.py 2>&1 | tail -40 + + ruff: + name: ruff (informational) + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - run: pip install ruff + - name: ruff check + # Informational only — don't block merge on lint noise. Surfaces + # in the workflow log so we can clean up over time. + run: ruff check app/ agents/ scripts/ monitor/ --output-format=concise || true + + bandit: + name: bandit SAST (informational) + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - run: pip install bandit + - name: bandit scan + # -ll = report MEDIUM+ severity only (filters out the noise). + # Findings here are a useful belt-and-suspenders signal — most + # are already either fixed in this PR or dismissed in CodeQL. + run: bandit -r app/ agents/ scripts/ monitor/ -ll || true diff --git a/Dockerfile b/Dockerfile index 76a52ab..434fb5e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,12 @@ RUN pip install --no-cache-dir -r requirements.txt COPY . . +# Drop root — uvicorn runs as an unprivileged user, limiting blast +# radius if the application is compromised. +RUN useradd --create-home --shell /bin/bash --uid 1001 appuser \ + && chown -R appuser:appuser /app +USER appuser + EXPOSE 8000 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/agents/herald_v3.py b/agents/herald_v3.py index 553fdc2..c5b4c78 100644 --- a/agents/herald_v3.py +++ b/agents/herald_v3.py @@ -349,7 +349,7 @@ def generate_awareness_tweet(state: dict) -> str | None: available = TOPIC_SEEDS recent_seeds = [] - seed = random.choice(available) + seed = random.choice(available) # noqa: S311 — non-security topic seed selection recent_seeds.append(seed) state["recent_seeds"] = recent_seeds[-10:] diff --git a/agents/moltbook_poster.py b/agents/moltbook_poster.py index 35009e4..03c826e 100644 --- a/agents/moltbook_poster.py +++ b/agents/moltbook_poster.py @@ -108,7 +108,7 @@ def generate_post(topic, previous_titles): log.error("No ANTHROPIC_API_KEY available") return None - submolt = random.choice(SUBMOLTS) + submolt = random.choice(SUBMOLTS) # noqa: S311 — non-security content selection prev_list = "\n".join(f"- {t}" for t in previous_titles[-15:]) if previous_titles else "None yet" user_msg = ( @@ -260,7 +260,10 @@ def save_state(state): def post_hash(title): - return hashlib.md5(title.encode()).hexdigest()[:12] + # SHA-256 truncated to 12 hex chars. MD5 is broken (collision attacks); + # while this hash is non-security-critical (dedup only), avoiding MD5 + # silences static-analysis noise and pre-empts future foot-guns. + return hashlib.sha256(title.encode()).hexdigest()[:12] # ── Lobster Math Solver ─────────────────────────────────────────────────────── @@ -525,7 +528,7 @@ def pick_post(state): previous_titles = state.get("posted_titles", []) # Pick a random topic seed - topic = random.choice(TOPIC_SEEDS) + topic = random.choice(TOPIC_SEEDS) # noqa: S311 — non-security content selection log.info(f"Topic seed: {topic}") # Try Claude-generated post @@ -543,7 +546,7 @@ def pick_post(state): state["posted_hashes"] = [] available = FALLBACK_POOL - return random.choice(available) + return random.choice(available) # noqa: S311 — non-security content selection def main(): diff --git a/agents/news_scout.py b/agents/news_scout.py index fc6e510..b7889f8 100644 --- a/agents/news_scout.py +++ b/agents/news_scout.py @@ -6,7 +6,13 @@ import sys import time import hashlib -import xml.etree.ElementTree as ET +# defusedxml protects against XXE / Billion Laughs / DTD-bomb attacks on +# untrusted RSS feeds. Falls back to stdlib only if defusedxml isn't +# installed yet — the requirements.txt bump in this commit declares it. +try: + import defusedxml.ElementTree as ET # type: ignore[import] +except ImportError: + import xml.etree.ElementTree as ET # type: ignore[no-redef] from datetime import datetime, timezone, timedelta from pathlib import Path from urllib.parse import quote_plus @@ -106,8 +112,8 @@ def save_heartbeat(status: str, detail: str = ""): def url_key(url: str) -> str: - """Normalize URL for dedup.""" - return hashlib.md5(url.strip().lower().encode()).hexdigest() + """Normalize URL for dedup. SHA-256 (MD5 is broken).""" + return hashlib.sha256(url.strip().lower().encode()).hexdigest() def parse_date(date_str: str) -> datetime | None: diff --git a/agents/poll_payments.py b/agents/poll_payments.py index ca3c61a..f4f4103 100644 --- a/agents/poll_payments.py +++ b/agents/poll_payments.py @@ -43,8 +43,10 @@ def fetch_recent_transfers() -> list: url = f"{BASESCAN_URL}?{params}" try: + if not url.startswith(("http://", "https://")): + raise ValueError(f"refusing non-HTTP(S) URL") req = Request(url, headers={"User-Agent": "MolTrust/1.0"}) - with urlopen(req, timeout=15) as resp: + with urlopen(req, timeout=15) as resp: # noqa: S310 — scheme validated above data = json.loads(resp.read()) except Exception as e: log.error("Basescan API error: %s", e) diff --git a/agents/retention_cleanup.py b/agents/retention_cleanup.py index 51ed024..95c439e 100644 --- a/agents/retention_cleanup.py +++ b/agents/retention_cleanup.py @@ -16,9 +16,11 @@ def send_telegram(msg): return try: data = json.dumps({"chat_id": TG_CHAT, "text": msg}).encode() - req = Request(f"https://api.telegram.org/bot{TG_TOKEN}/sendMessage", - data=data, headers={"Content-Type": "application/json"}) - urlopen(req, timeout=10) + url = f"https://api.telegram.org/bot{TG_TOKEN}/sendMessage" + if not url.startswith(("http://", "https://")): + return + req = Request(url, data=data, headers={"Content-Type": "application/json"}) + urlopen(req, timeout=10) # noqa: S310 — scheme validated above except Exception: pass diff --git a/agents/x_thread_followup.py b/agents/x_thread_followup.py index 93c92cd..57fe063 100755 --- a/agents/x_thread_followup.py +++ b/agents/x_thread_followup.py @@ -21,7 +21,7 @@ #AIAgents #W3C #DID #Base #OpenSource""" -resp = requests.post("https://api.twitter.com/2/tweets", json={"text": text}, auth=auth) +resp = requests.post("https://api.twitter.com/2/tweets", json={"text": text}, auth=auth, timeout=15) data = resp.json() if resp.status_code in (200, 201): print(f"Tweet posted: {data['data']['id']}") diff --git a/agents/x_wallet_binding.py b/agents/x_wallet_binding.py index 96be606..2edddca 100644 --- a/agents/x_wallet_binding.py +++ b/agents/x_wallet_binding.py @@ -24,7 +24,7 @@ #AIAgents #x402 #Base #A2A""" -resp = requests.post("https://api.twitter.com/2/tweets", json={"text": text}, auth=auth) +resp = requests.post("https://api.twitter.com/2/tweets", json={"text": text}, auth=auth, timeout=15) data = resp.json() if resp.status_code in (200, 201): print(f"Tweet posted: {data['data']['id']}") diff --git a/app/admin_auth.py b/app/admin_auth.py index 80b464d..0e4c7fb 100644 --- a/app/admin_auth.py +++ b/app/admin_auth.py @@ -1,22 +1,37 @@ """MolTrust Admin Dashboard — Auth Module""" +import os import secrets from datetime import datetime, timezone, timedelta import bcrypt -ADMIN_USERS = { - "lars": { - "hash": "$2b$12$rxHaimEF4Ok1bXO4jybvQOx8cSmwhM/JRGWfTtlZ0OvvoFftTg6NC", - "role": "superadmin", - }, - "harald": { - "hash": "$2b$12$c5GzSAMWozukKvNWmIiZ8OnP7I9i/7Ho0kKx5hVNGGUbJzWKXvZgC", - "role": "admin", - }, - "bernd": { - "hash": "$2b$12$l3IuGfAveTEmC06YS7CNb.C3yGU5rkRvRJYnfpD6C4OtyBcqlMQBK", - "role": "admin", - }, -} + +def _load_admin_users() -> dict: + """ + Load admin users from env vars rather than baking bcrypt hashes into + source. The expected format is a comma-separated triplet list: + + MOLTRUST_ADMIN_USERS="lars:superadmin:$2b$12$...,harald:admin:$2b$12$...,bernd:admin:$2b$12$..." + + Empty / missing env var → no admins registered (login will refuse + every request, fail-closed). Hashes that don't parse as bcrypt are + skipped with a startup warning. + """ + raw = os.environ.get("MOLTRUST_ADMIN_USERS", "").strip() + if not raw: + return {} + users: dict[str, dict] = {} + for entry in raw.split(","): + parts = entry.strip().split(":", 2) + if len(parts) != 3: + continue + username, role, hashval = parts[0].strip(), parts[1].strip(), parts[2].strip() + if not username or not role or not hashval.startswith("$2"): + continue + users[username] = {"hash": hashval, "role": role} + return users + + +ADMIN_USERS: dict[str, dict] = _load_admin_users() # In-memory sessions (sufficient for 3 users) SESSIONS: dict[str, dict] = {} diff --git a/app/billing.py b/app/billing.py index 286f1b0..0e86eb8 100644 --- a/app/billing.py +++ b/app/billing.py @@ -13,6 +13,7 @@ """ import os +import secrets import logging import re from datetime import datetime, timezone @@ -374,7 +375,7 @@ async def list_referrals(request: Request): """ admin_key = request.headers.get("x-admin-key", "") expected = os.environ.get("ADMIN_KEY", "") - if not expected or admin_key != expected: + if not expected or not admin_key or not secrets.compare_digest(admin_key, expected): raise HTTPException(401, "Admin key required") from app.main import db_pool diff --git a/app/crypto/kms_signer.py b/app/crypto/kms_signer.py index 7bd1251..0d0790d 100644 --- a/app/crypto/kms_signer.py +++ b/app/crypto/kms_signer.py @@ -13,13 +13,24 @@ def get_decrypted_signing_key_hex() -> str: """ Return the Ed25519 private key as a hex string (64 chars / 32 bytes). Decrypted via AWS KMS with 5-minute cache. - Falls back to plaintext env var or key file during migration. + Falls back to plaintext env var or key file during migration — + these fallbacks are DISABLED when MOLTRUST_ENV=production so that + an accidentally-set DID_PRIVATE_KEY_HEX env var or stray local + keyfile cannot bypass KMS in a production deploy. """ global _cached_key, _cache_expiry - # Fallback 1: plaintext env var (migration period) + is_production = os.environ.get('MOLTRUST_ENV', '').lower() == 'production' + + # Fallback 1: plaintext env var (migration / dev only) encrypted = os.environ.get('DID_PRIVATE_KEY_ENCRYPTED') if not encrypted: + if is_production: + raise ValueError( + 'KMS-encrypted signing key required in production ' + '(MOLTRUST_ENV=production); plaintext fallbacks disabled. ' + 'Set DID_PRIVATE_KEY_ENCRYPTED or unset MOLTRUST_ENV.' + ) hex_key = os.environ.get('DID_PRIVATE_KEY_HEX', '') if hex_key: return hex_key diff --git a/app/ipfs_publisher.py b/app/ipfs_publisher.py index 9207480..141ae78 100644 --- a/app/ipfs_publisher.py +++ b/app/ipfs_publisher.py @@ -44,6 +44,8 @@ def publish_to_ipfs(vc_json: dict, name: str = None) -> str | None: } }).encode() + if not PINATA_API_URL.startswith(("http://", "https://")): + raise ValueError("PINATA_API_URL must use http(s)://") req = urllib.request.Request( PINATA_API_URL, data=payload, @@ -53,7 +55,7 @@ def publish_to_ipfs(vc_json: dict, name: str = None) -> str | None: } ) - with urllib.request.urlopen(req, timeout=15) as r: + with urllib.request.urlopen(req, timeout=15) as r: # noqa: S310 — scheme validated above result = json.loads(r.read()) cid = result.get("IpfsHash") if cid: diff --git a/app/main.py b/app/main.py index 3890ca4..1265b91 100644 --- a/app/main.py +++ b/app/main.py @@ -68,13 +68,28 @@ def _ratelimit_key(request) -> str: @app.get("/docs", include_in_schema=False) async def custom_swagger_ui(): + # CSP limits the blast radius of any future injection into this HTML. + # Allowlist mirrors what the page actually loads (Swagger UI CDN + # + favicon host). 'unsafe-inline' is required by the existing + # theme-toggle script and inline styles; tightening further would + # need extracting those into a hashed external file. + _csp = ( + "default-src 'self'; " + "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; " + "style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; " + "img-src 'self' data: https://moltrust.ch; " + "font-src 'self' data:; " + "connect-src 'self'; " + "frame-ancestors 'none'; " + "base-uri 'self'" + ) return _HTMLResp("""