Leaderboard: update vLLM-SR to v350 metrics (#131), now #1 (#133) #9
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sync leaderboard data to website | |
| # Regenerates the derived data the RouteWorks website consumes | |
| # (routerMetrics/leaderboard.json, category_scores.json, flip_labels/*) from the | |
| # leaderboard manifest + evaluated prediction files, and opens a PR on | |
| # RouteWorks/routeworks.github.io for review. The website's own deploy workflow | |
| # publishes once that PR is merged. | |
| # | |
| # Requires a repository secret WEBSITE_SYNC_TOKEN: a fine-grained PAT (or GitHub | |
| # App installation token) with Contents:write + Pull requests:write on | |
| # RouteWorks/routeworks.github.io. | |
| on: | |
| push: | |
| branches: [main] | |
| paths: | |
| - "README.md" | |
| - "leaderboard_manifest.yaml" | |
| - "router_inference/predictions/**" | |
| - "scripts/website/build_site_data.py" | |
| workflow_dispatch: {} | |
| permissions: | |
| contents: read | |
| jobs: | |
| sync: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout RouterArena | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.11" | |
| - name: Install uv | |
| run: pip install uv | |
| - name: Prepare dataset | |
| env: | |
| ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/dataset | |
| run: uv run python scripts/process_datasets/prep_datasets.py | |
| - name: Checkout website repo | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: RouteWorks/routeworks.github.io | |
| token: ${{ secrets.WEBSITE_SYNC_TOKEN }} | |
| path: website | |
| - name: Build website data (merge in place) | |
| # Runs against the website's existing src/data so externally pre-computed | |
| # baselines are preserved; only routers in the manifest are updated. | |
| env: | |
| ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/dataset | |
| run: uv run python scripts/website/build_site_data.py --out website/src/data | |
| - name: Open PR on website repo | |
| uses: peter-evans/create-pull-request@v6 | |
| with: | |
| token: ${{ secrets.WEBSITE_SYNC_TOKEN }} | |
| path: website | |
| branch: routerarena-sync | |
| commit-message: "Sync leaderboard data from RouterArena@${{ github.sha }}" | |
| title: "Sync leaderboard data from RouterArena" | |
| body: | | |
| Automated update of `src/data` from RouterArena | |
| (commit ${{ github.sha }}). | |
| - `routerMetrics/leaderboard.json` — headline metrics (sourced from README) | |
| - `routerMetrics/category_scores.json` — per-difficulty breakdown | |
| - `flip_labels/*` — per-query robustness flips | |
| Generated by `scripts/website/build_site_data.py`. | |
| delete-branch: true |