Skip to content

Commit d0e776a

Browse files
authored
Fix GitHub hosted CI runner bugs for router evaluation (#84)
* try a different way to install uv * fix active approach. * Add more log to track the progress * enableing log in the middle * enable subprocess log * fix pre-commit ci
1 parent 57cb8c0 commit d0e776a

2 files changed

Lines changed: 35 additions & 10 deletions

File tree

.github/workflows/pr-evaluation.yml

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ on:
66

77
jobs:
88
evaluate-router:
9+
concurrency:
10+
group: evaluate-router-pr-${{ github.event.issue.number }}
11+
cancel-in-progress: false
12+
timeout-minutes: 120
913
if: >-
1014
github.event.issue.pull_request &&
1115
startsWith(github.event.comment.body, '/evaluate') &&
@@ -183,6 +187,20 @@ jobs:
183187
echo "Detected router submission: ${{ steps.detect.outputs.router }}"
184188
echo "Detected split: ${{ steps.detect.outputs.split }}"
185189
190+
- name: Set up Python
191+
if: ${{ steps.detect.outputs.router != '' }}
192+
uses: actions/setup-python@v5
193+
with:
194+
python-version: "3.11"
195+
196+
- name: Install uv
197+
if: ${{ steps.detect.outputs.router != '' }}
198+
run: |
199+
set -euo pipefail
200+
python -m pip install --upgrade pip
201+
python -m pip install uv
202+
uv --version
203+
186204
- name: Prepare dataset
187205
if: ${{ steps.detect.outputs.router != '' }}
188206
working-directory: base
@@ -191,7 +209,7 @@ jobs:
191209
# Prepare dataset from public repository
192210
# Uses base repo's script (safe - not from PR)
193211
echo "Preparing dataset..."
194-
mkdir -p "${{ github.workspace }}/dataset"
212+
mkdir -p dataset
195213
uv run python scripts/process_datasets/prep_datasets.py
196214
197215
- name: Copy PR prediction file to base workspace
@@ -221,17 +239,16 @@ jobs:
221239
id: evaluate
222240
working-directory: base
223241
env:
224-
ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/dataset
242+
ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/base/dataset
225243
run: |
226-
set -euo pipefail; trap 'cat evaluation_output.txt' EXIT
244+
set -euo pipefail
227245
# Uses base repo's evaluation script (safe - not from PR)
228246
BASE_SHA="${{ steps.pr.outputs.base_sha }}"
229247
uv run python automation/process_pr_submission.py \
230248
--pr "${{ steps.pr.outputs.number }}" \
231249
--router "${{ steps.detect.outputs.router }}" \
232250
--split "${{ steps.detect.outputs.split }}" \
233-
--base-ref "$BASE_SHA" > evaluation_output.txt 2>&1
234-
cat evaluation_output.txt
251+
--base-ref "$BASE_SHA" 2>&1 | tee evaluation_output.txt
235252
236253
- name: Post evaluation results as PR comment
237254
if: ${{ steps.detect.outputs.router != '' && steps.evaluate.outcome == 'success' }}

automation/process_pr_submission.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -454,23 +454,29 @@ def main(argv: Optional[list[str]] = None) -> int:
454454
)
455455

456456
if not args.skip_sync:
457-
run_command(["uv", "sync", "--locked"], cwd=worktree_path, capture=True)
457+
print("▶ Syncing dependencies with uv...", flush=True)
458+
run_command(["uv", "sync", "--locked"], cwd=worktree_path, capture=False)
459+
print("✔ Synced dependencies")
458460

459461
validation_cmd = [
460462
"uv",
461463
"run",
462-
"--activepython",
464+
"--active",
463465
"router_inference/check_config_prediction_files.py",
464466
args.router,
465467
args.split,
466468
"--check-generated-result",
467469
]
468-
validation_result = run_command(validation_cmd, cwd=worktree_path, capture=True)
470+
print("▶ Validating prediction/config files...", flush=True)
471+
validation_result = run_command(
472+
validation_cmd, cwd=worktree_path, capture=False
473+
)
474+
print("✔ Validated prediction files")
469475

470476
evaluation_cmd = [
471477
"uv",
472478
"run",
473-
"--activepython",
479+
"--active",
474480
"llm_evaluation/run.py",
475481
args.router,
476482
args.split,
@@ -479,9 +485,11 @@ def main(argv: Optional[list[str]] = None) -> int:
479485

480486
evaluation_logs = ""
481487
try:
488+
print("▶ Running evaluation...", flush=True)
482489
evaluation_result = run_command(
483-
evaluation_cmd, cwd=worktree_path, capture=True
490+
evaluation_cmd, cwd=worktree_path, capture=False
484491
)
492+
print("✔ Evaluated predictions")
485493
evaluation_logs = (evaluation_result.stdout or "") + (
486494
evaluation_result.stderr or ""
487495
)

0 commit comments

Comments
 (0)