Merge pull request #363 from ryanpetrello/leaderboard-python-wheel-bu… #53
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Leaderboard Management | |
| on: | |
| # Validate on PR | |
| pull_request: | |
| paths: | |
| - 'submissions/**/*-assessment.json' | |
| # Update after merge to main | |
| push: | |
| branches: [main] | |
| paths: | |
| - 'submissions/**/*-assessment.json' | |
| - 'scripts/generate-leaderboard-data.py' | |
| # Manual trigger for testing | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| jobs: | |
| # Job 1: Validate leaderboard submissions (PR only) | |
| validate: | |
| if: github.event_name == 'pull_request' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read # Checkout PR branch | |
| pull-requests: write # Post validation results as PR comment | |
| issues: write # Required by github-script (PRs are issues in GH API) | |
| steps: | |
| # Fix for outdated forks: Always use upstream for validation tools/schema | |
| # The PR branch may be from a fork that hasn't synced with upstream, | |
| # causing validation to use outdated schema. We checkout upstream main | |
| # for the agentready tools, then fetch only the submission file from PR. | |
| # See: https://github.com/ambient-code/agentready/pull/312 | |
| - name: Checkout upstream for validation tools | |
| uses: actions/checkout@v6 | |
| with: | |
| repository: ${{ github.repository }} | |
| ref: main | |
| fetch-depth: 0 | |
| - name: Fetch PR submission file | |
| env: | |
| PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} | |
| PR_HEAD_REPO: ${{ github.event.pull_request.head.repo.full_name }} | |
| run: | | |
| # Fetch the PR branch to get the submission file | |
| git fetch "https://github.com/${PR_HEAD_REPO}.git" "${PR_HEAD_SHA}:refs/remotes/pr/head" | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.12' | |
| - name: Install dependencies | |
| run: | | |
| pip install uv | |
| uv venv | |
| source .venv/bin/activate | |
| uv pip install -e . | |
| - name: Extract submission details | |
| id: extract | |
| run: | | |
| # Find changed JSON file between main and PR head | |
| CHANGED_FILE=$(git diff --name-only origin/main...pr/head | grep 'submissions/.*-assessment.json' | head -1) | |
| if [ -z "$CHANGED_FILE" ]; then | |
| echo "No assessment file found in diff" | |
| exit 1 | |
| fi | |
| echo "file=$CHANGED_FILE" >> "$GITHUB_OUTPUT" | |
| # Extract the submission file from PR branch to a temp location | |
| # This ensures we validate the actual submitted file, not main branch | |
| git show "pr/head:${CHANGED_FILE}" > /tmp/submission.json | |
| # Parse JSON from PR's submission file - all values stored in outputs, not executed | |
| REPO_URL=$(jq -r '.repository.url' /tmp/submission.json) | |
| CLAIMED_SCORE=$(jq -r '.overall_score' /tmp/submission.json) | |
| REPO_NAME=$(jq -r '.repository.name' /tmp/submission.json) | |
| RESEARCH_VERSION=$(jq -r '.metadata.research_version // "unknown"' /tmp/submission.json) | |
| { | |
| echo "repo_url=$REPO_URL" | |
| echo "claimed_score=$CLAIMED_SCORE" | |
| echo "repo_name=$REPO_NAME" | |
| echo "research_version=$RESEARCH_VERSION" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Validate JSON schema | |
| run: | | |
| # Validate using upstream's schema against the PR's submission file | |
| # This ensures outdated forks don't cause false validation failures | |
| source .venv/bin/activate | |
| agentready validate-report /tmp/submission.json | |
| - name: Detect repository host | |
| id: detect_host | |
| env: | |
| REPO_URL: ${{ steps.extract.outputs.repo_url }} | |
| run: | | |
| # Determine if this is a GitHub or GitLab repository | |
| if echo "$REPO_URL" | grep -q "github\.com"; then | |
| echo "host=github" >> "$GITHUB_OUTPUT" | |
| elif echo "$REPO_URL" | grep -q "gitlab\.com"; then | |
| echo "host=gitlab" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "::error::Unsupported repository host in URL: $REPO_URL" | |
| exit 1 | |
| fi | |
| # Convert SSH/HTTPS URL to an HTTPS clone URL | |
| # git@<host>:<path>.git -> https://<host>/<path>.git | |
| CLONE_URL=$(echo "$REPO_URL" | sed -E 's|^git@([^:]+):|https://\1/|' | sed 's|\.git$||') | |
| echo "clone_url=${CLONE_URL}.git" >> "$GITHUB_OUTPUT" | |
| echo "browse_url=$CLONE_URL" >> "$GITHUB_OUTPUT" | |
| - name: Verify repository exists and is public | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| REPO_URL: ${{ steps.extract.outputs.repo_url }} | |
| HOST: ${{ steps.detect_host.outputs.host }} | |
| CLONE_URL: ${{ steps.detect_host.outputs.clone_url }} | |
| run: | | |
| if [ "$HOST" = "github" ]; then | |
| # GitHub: use gh CLI for verification | |
| ORG_REPO=$(echo "$REPO_URL" | sed 's|git@github.com:||' | sed 's|https://github.com/||' | sed 's|\.git$||') | |
| IS_PRIVATE=$(gh repo view "$ORG_REPO" --json isPrivate -q '.isPrivate') | |
| if [ "$IS_PRIVATE" == "true" ]; then | |
| echo "::error::Repository $ORG_REPO is private." | |
| exit 1 | |
| fi | |
| echo "✅ Repository $ORG_REPO is public" | |
| else | |
| # GitLab/other: verify repo is publicly accessible via git ls-remote | |
| if git ls-remote --exit-code "$CLONE_URL" HEAD > /dev/null 2>&1; then | |
| echo "✅ Repository is publicly accessible: $CLONE_URL" | |
| else | |
| echo "::error::Repository is not publicly accessible: $CLONE_URL" | |
| exit 1 | |
| fi | |
| fi | |
| - name: Verify submitter has access | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| REPO_URL: ${{ steps.extract.outputs.repo_url }} | |
| SUBMITTER: ${{ github.event.pull_request.user.login }} | |
| HOST: ${{ steps.detect_host.outputs.host }} | |
| run: | | |
| if [ "$HOST" = "github" ]; then | |
| # GitHub: verify via API | |
| ORG_REPO=$(echo "$REPO_URL" | sed 's|git@github.com:||' | sed 's|https://github.com/||' | sed 's|\.git$||') | |
| if gh api "/repos/$ORG_REPO/collaborators/$SUBMITTER" 2>/dev/null; then | |
| echo "✅ $SUBMITTER is a collaborator on $ORG_REPO" | |
| elif [ "$(gh api "/repos/$ORG_REPO" -q '.owner.login')" == "$SUBMITTER" ]; then | |
| echo "✅ $SUBMITTER is the owner of $ORG_REPO" | |
| else | |
| echo "::error::$SUBMITTER does not have commit access to $ORG_REPO" | |
| exit 1 | |
| fi | |
| else | |
| # Non-GitHub: cannot verify cross-platform access automatically | |
| echo "::warning::Cannot verify submitter access for non-GitHub repos. Manual review required." | |
| echo "⚠️ Submitter access for non-GitHub repos must be verified manually by maintainers." | |
| fi | |
| - name: Re-run assessment | |
| env: | |
| CLONE_URL: ${{ steps.detect_host.outputs.clone_url }} | |
| run: | | |
| source .venv/bin/activate | |
| echo "Cloning $CLONE_URL..." | |
| git clone "$CLONE_URL" /tmp/repo-to-assess | |
| echo "Running assessment..." | |
| agentready assess /tmp/repo-to-assess --output-dir /tmp/validation | |
| ACTUAL_SCORE=$(jq -r '.overall_score' /tmp/validation/assessment-latest.json) | |
| ACTUAL_RESEARCH_VERSION=$(jq -r '.metadata.research_version // "unknown"' /tmp/validation/assessment-latest.json) | |
| echo "ACTUAL_SCORE=$ACTUAL_SCORE" >> "$GITHUB_ENV" | |
| echo "ACTUAL_RESEARCH_VERSION=$ACTUAL_RESEARCH_VERSION" >> "$GITHUB_ENV" | |
| - name: Compare scores | |
| env: | |
| CLAIMED_SCORE: ${{ steps.extract.outputs.claimed_score }} | |
| CLAIMED_RESEARCH_VERSION: ${{ steps.extract.outputs.research_version }} | |
| run: | | |
| # SAFE: All arithmetic using env vars | |
| DIFF=$(echo "scale=2; if ($ACTUAL_SCORE - $CLAIMED_SCORE < 0) $CLAIMED_SCORE - $ACTUAL_SCORE else $ACTUAL_SCORE - $CLAIMED_SCORE" | bc) | |
| echo "Claimed: $CLAIMED_SCORE (ruleset: $CLAIMED_RESEARCH_VERSION)" | |
| echo "Actual: $ACTUAL_SCORE (ruleset: $ACTUAL_RESEARCH_VERSION)" | |
| if (( $(echo "$DIFF > 2" | bc -l) )); then | |
| echo "::error::Score mismatch: claimed $CLAIMED_SCORE, actual $ACTUAL_SCORE" | |
| exit 1 | |
| fi | |
| - name: Post validation results | |
| if: always() | |
| uses: actions/github-script@v8 | |
| env: | |
| CLAIMED_SCORE: ${{ steps.extract.outputs.claimed_score }} | |
| ACTUAL_SCORE: ${{ env.ACTUAL_SCORE }} | |
| REPO_NAME: ${{ steps.extract.outputs.repo_name }} | |
| CLAIMED_RESEARCH_VERSION: ${{ steps.extract.outputs.research_version }} | |
| ACTUAL_RESEARCH_VERSION: ${{ env.ACTUAL_RESEARCH_VERSION }} | |
| with: | |
| script: | | |
| // SAFE: All values from environment variables | |
| const claimed = process.env.CLAIMED_SCORE || 'N/A'; | |
| const actual = process.env.ACTUAL_SCORE || 'N/A'; | |
| const diff = actual !== 'N/A' ? Math.abs(parseFloat(actual) - parseFloat(claimed)).toFixed(1) : 'N/A'; | |
| const status = parseFloat(diff) <= 2.0 ? '✅ **PASSED**' : '❌ **FAILED**'; | |
| const body = `## Leaderboard Validation\n\n${status}\n\n` + | |
| `**Claimed**: ${claimed}/100\n` + | |
| `**Verified**: ${actual}/100\n` + | |
| `**Diff**: ${diff} points (±2 tolerance)`; | |
| github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body: body | |
| }); | |
| # Job 2: Update leaderboard data (after merge to main, or manual trigger) | |
| update: | |
| if: github.ref == 'refs/heads/main' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch') | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write # Commit and push updated leaderboard data | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| - name: Set up Python | |
| uses: actions/setup-python@v6 | |
| with: | |
| python-version: '3.12' | |
| - name: Generate leaderboard data | |
| run: | | |
| python3 scripts/generate-leaderboard-data.py | |
| - name: Commit updated data | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| if [ -n "$(git status --porcelain docs/_data/leaderboard.json)" ]; then | |
| git add docs/_data/leaderboard.json | |
| git commit -m "chore: update leaderboard data [skip ci]" | |
| git push | |
| fi |