Skip to content

feat: 记忆管理 benchmark 体系 (MemoryEvalRunner + FactRetentionGrader) #11

feat: 记忆管理 benchmark 体系 (MemoryEvalRunner + FactRetentionGrader)

feat: 记忆管理 benchmark 体系 (MemoryEvalRunner + FactRetentionGrader) #11

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
inputs:
run_eval:
description: "Run full eval suite"
type: boolean
default: false
jobs:
check:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}
- name: Install dependencies
run: uv sync --dev
- name: Lint (ruff check)
run: uv run ruff check .
- name: Format check (ruff format)
run: uv run ruff format --check .
- name: Type check (mypy)
continue-on-error: true
run: uv run mypy src/omni_agent
- name: Unit tests
run: uv run pytest --ignore=tests/integration --ignore=tests/api -v --cov=src/omni_agent --cov-report=term-missing
- name: Eval dry-run (validate eval cases)
run: uv run python -m omni_agent.eval evals/ --dry-run
- name: Upload coverage report
if: matrix.python-version == '3.12'
uses: actions/upload-artifact@v4
with:
name: coverage-report
path: htmlcov/
eval:
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
- name: Set up Python
run: uv python install 3.12
- name: Install dependencies
run: uv sync --dev
- name: Run eval suite
env:
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_BASE: ${{ secrets.LLM_API_BASE }}
LLM_MODEL: ${{ secrets.LLM_MODEL }}
run: uv run python -m omni_agent.eval evals/ --output eval_results
- name: Upload eval report
uses: actions/upload-artifact@v4
with:
name: eval-report
path: eval_results/