Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Tests
on:
push:
branches: [main, 'feat/**']
pull_request:
branches: [main]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12']
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Run pytest
run: pytest tests/ -v --tb=short --cov=aiteen --cov-report=term-missing
32 changes: 32 additions & 0 deletions .github/workflows/translate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Auto-translate missing i18n strings
on:
pull_request:
paths:
- '**/locales/en/**/*.json'
- '**/locales/en.json'
jobs:
translate:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.head_ref }}
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install aiteen
run: pip install .
- name: Run translation pipeline
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
LOCALES_DIR: ${{ inputs.locales_dir || 'public/locales' }}
run: |
aiteen run-all --locales-dir "$LOCALES_DIR" --source-lang en
- name: Commit translations
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: 'chore(i18n): auto-translate missing strings'
commit_options: '--no-verify'
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,11 @@ datasets
locales
locale_comparison
.direnv
__pycache__/
*.py[cod]
*.egg-info/
dist/
build/
.pytest_cache/
.coverage
*.egg
3 changes: 3 additions & 0 deletions aiteen/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Aiteen v2 — robust i18n translation pipeline for Tari projects."""

__version__ = "2.0.0"
121 changes: 121 additions & 0 deletions aiteen/audit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Audit locale files for missing translations.

Handles arbitrarily nested JSON. Compares each non-source locale against the
source locale (default: en). A key is considered "missing" if it's absent in
the target OR if its value equals the source value (i.e., untranslated).
"""
from __future__ import annotations

import json
from pathlib import Path
from typing import Any


def load_json(path: Path) -> dict:
"""Load a JSON file, raising a clear error on malformed input."""
try:
with path.open("r", encoding="utf-8") as fh:
return json.load(fh)
except json.JSONDecodeError as e:
raise ValueError(f"Malformed JSON in {path}: {e}") from e
except OSError as e:
raise OSError(f"Cannot read {path}: {e}") from e


def flatten(data: Any, parent: str = "") -> dict[str, Any]:
"""Flatten a nested dict into dotted-key paths.

Lists are treated as leaf values (translated as a whole). Non-string scalars
are returned as-is so the caller can decide whether to translate them.
"""
out: dict[str, Any] = {}
if not isinstance(data, dict):
return {parent: data} if parent else {}
for key, value in data.items():
path = f"{parent}.{key}" if parent else key
if isinstance(value, dict):
nested = flatten(value, path)
if not nested:
# Empty dict — preserve as leaf so deep-merge can recreate it.
out[path] = {}
else:
out.update(nested)
else:
out[path] = value
return out


def unflatten(flat: dict[str, Any]) -> dict[str, Any]:
"""Inverse of flatten()."""
result: dict[str, Any] = {}
for key, value in flat.items():
parts = key.split(".")
cursor = result
for part in parts[:-1]:
existing = cursor.get(part)
if not isinstance(existing, dict):
cursor[part] = {}
cursor = cursor[part]
cursor[parts[-1]] = value
return result


def list_namespaces(locale_dir: Path) -> list[str]:
"""Return the JSON file basenames (without .json) inside a locale directory."""
if not locale_dir.is_dir():
return []
return sorted(p.stem for p in locale_dir.glob("*.json"))


def find_missing(
source_dir: Path, target_dir: Path
) -> dict[str, dict[str, str]]:
"""Compare two locale dirs. Returns {namespace: {dotted_key: source_value}}.

A key is missing if absent in the target, or if target value equals source
value (untranslated leftover).
"""
missing: dict[str, dict[str, str]] = {}
if not source_dir.is_dir():
raise FileNotFoundError(f"Source locale dir not found: {source_dir}")

for ns in list_namespaces(source_dir):
src_path = source_dir / f"{ns}.json"
tgt_path = target_dir / f"{ns}.json"
src = flatten(load_json(src_path))

if not tgt_path.is_file():
# Whole namespace missing — every translatable string is missing.
ns_missing = {k: v for k, v in src.items() if isinstance(v, str)}
if ns_missing:
missing[ns] = ns_missing
continue

tgt = flatten(load_json(tgt_path))
ns_missing: dict[str, str] = {}
for key, src_val in src.items():
if not isinstance(src_val, str):
continue
if key not in tgt:
ns_missing[key] = src_val
elif tgt[key] == src_val and src_val.strip():
# Identical to source — likely untranslated.
ns_missing[key] = src_val
if ns_missing:
missing[ns] = ns_missing
return missing


def audit_all(
locales_dir: Path, source_locale: str, target_locales: list[str]
) -> dict[str, dict[str, dict[str, str]]]:
"""Run the audit across every target locale.

Returns: {locale: {namespace: {key: source_value}}}.
"""
src_dir = locales_dir / source_locale
report: dict[str, dict[str, dict[str, str]]] = {}
for loc in target_locales:
tgt_dir = locales_dir / loc
report[loc] = find_missing(src_dir, tgt_dir)
return report
Loading