From 3cbcaba727eec1380a10bbda8e0d64830f3d18a4 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 13:48:48 +0800 Subject: [PATCH 01/13] [wip] flagos user tests --- .github/CODEOWNERS | 42 ++ .github/ISSUE_TEMPLATE/new_test_case.yml | 94 +++ .github/PULL_REQUEST_TEMPLATE.md | 37 ++ .github/scripts/detect_changed_repos.js | 65 +++ .github/workflows/nightly_integration.yml | 94 +++ .github/workflows/pr_validation.yml | 85 +++ .github/workflows/test_dispatch.yml | 104 ++++ flagos-user-tests/CONTRIBUTING.md | 76 +++ flagos-user-tests/README.md | 69 +++ flagos-user-tests/docs/getting_started.md | 99 ++++ flagos-user-tests/docs/test_format_spec.md | 177 ++++++ flagos-user-tests/repos.yaml | 45 ++ flagos-user-tests/resource_map.yaml | 99 ++++ flagos-user-tests/tests/flagcx/.gitkeep | 0 flagos-user-tests/tests/flaggems/.gitkeep | 0 .../tests/flagscale/hetero_train/.gitkeep | 0 .../inference/qwen3/demo_0_6b/.gitignore | 2 + .../inference/qwen3/demo_0_6b/README.md | 25 + .../qwen3/demo_0_6b/conf/demo_0_6b.yaml | 27 + .../demo_0_6b/conf/inference/demo_0_6b.yaml | 18 + .../inference/qwen3/demo_0_6b/demo_0_6b.yaml | 38 ++ .../demo_0_6b/gold_values/demo_0_6b.json | 12 + .../tests/flagscale/train/.gitkeep | 0 flagos-user-tests/tests/flagtree/.gitkeep | 0 .../tests/megatron-lm-fl/.gitkeep | 0 flagos-user-tests/tests/te-fl/.gitkeep | 0 flagos-user-tests/tests/vllm-fl/.gitkeep | 0 .../tests/vllm-plugin-fl/.gitkeep | 0 .../run_user_tests.cpython-312.pyc | Bin 0 -> 22466 bytes .../tools/generators/create_test_template.py | 233 ++++++++ flagos-user-tests/tools/resolve_matrix.py | 138 +++++ flagos-user-tests/tools/run_user_tests.py | 547 ++++++++++++++++++ .../tools/validators/lint_test_case.py | 157 +++++ .../tools/validators/validate_config.py | 159 +++++ .../tools/validators/validate_gold_values.py | 108 ++++ 35 files changed, 2550 insertions(+) create mode 100644 .github/CODEOWNERS create mode 100644 .github/ISSUE_TEMPLATE/new_test_case.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/scripts/detect_changed_repos.js create mode 100644 .github/workflows/nightly_integration.yml create mode 100644 .github/workflows/pr_validation.yml create mode 100644 .github/workflows/test_dispatch.yml create mode 100644 flagos-user-tests/CONTRIBUTING.md create mode 100644 flagos-user-tests/README.md create mode 100644 flagos-user-tests/docs/getting_started.md create mode 100644 flagos-user-tests/docs/test_format_spec.md create mode 100644 flagos-user-tests/repos.yaml create mode 100644 flagos-user-tests/resource_map.yaml create mode 100644 flagos-user-tests/tests/flagcx/.gitkeep create mode 100644 flagos-user-tests/tests/flaggems/.gitkeep create mode 100644 flagos-user-tests/tests/flagscale/hetero_train/.gitkeep create mode 100644 flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/.gitignore create mode 100644 flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md create mode 100644 flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/demo_0_6b.yaml create mode 100644 flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml create mode 100644 flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml create mode 100644 flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/gold_values/demo_0_6b.json create mode 100644 flagos-user-tests/tests/flagscale/train/.gitkeep create mode 100644 flagos-user-tests/tests/flagtree/.gitkeep create mode 100644 flagos-user-tests/tests/megatron-lm-fl/.gitkeep create mode 100644 flagos-user-tests/tests/te-fl/.gitkeep create mode 100644 flagos-user-tests/tests/vllm-fl/.gitkeep create mode 100644 flagos-user-tests/tests/vllm-plugin-fl/.gitkeep create mode 100644 flagos-user-tests/tools/__pycache__/run_user_tests.cpython-312.pyc create mode 100644 flagos-user-tests/tools/generators/create_test_template.py create mode 100644 flagos-user-tests/tools/resolve_matrix.py create mode 100644 flagos-user-tests/tools/run_user_tests.py create mode 100644 flagos-user-tests/tools/validators/lint_test_case.py create mode 100644 flagos-user-tests/tools/validators/validate_config.py create mode 100644 flagos-user-tests/tools/validators/validate_gold_values.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..ce1bbdd --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,42 @@ +# FlagOS DevOps - Code Owners + +# Default owners for everything +* @flagos-ai/devops-team + +# CI/CD workflows +.github/ @flagos-ai/devops-team + +# Shared actions +actions/ @flagos-ai/devops-team + +# === User Tests === + +# FlagScale test cases +flagos-user-tests/tests/flagscale/ @flagos-ai/flagscale-team + +# FlagGems test cases +flagos-user-tests/tests/flaggems/ @flagos-ai/flaggems-team + +# FlagCX test cases +flagos-user-tests/tests/flagcx/ @flagos-ai/flagcx-team + +# FlagTree test cases +flagos-user-tests/tests/flagtree/ @flagos-ai/flagtree-team + +# vLLM-FL test cases +flagos-user-tests/tests/vllm-fl/ @flagos-ai/vllm-team + +# vLLM-plugin-FL test cases +flagos-user-tests/tests/vllm-plugin-fl/ @flagos-ai/vllm-team + +# TE-FL test cases +flagos-user-tests/tests/te-fl/ @flagos-ai/te-team + +# Megatron-LM-FL test cases +flagos-user-tests/tests/megatron-lm-fl/ @flagos-ai/megatron-team + +# Experimental test cases +flagos-user-tests/tests/experimental/ @flagos-ai/devops-team + +# Validation tools +flagos-user-tests/tools/ @flagos-ai/devops-team diff --git a/.github/ISSUE_TEMPLATE/new_test_case.yml b/.github/ISSUE_TEMPLATE/new_test_case.yml new file mode 100644 index 0000000..899c5b4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/new_test_case.yml @@ -0,0 +1,94 @@ +name: New Test Case Submission +description: Submit a new test case for FlagOS repositories +title: "[Test Case] " +labels: ["new-test-case"] +body: + - type: dropdown + id: target-repo + attributes: + label: Target Repository + description: Which FlagOS repository is this test case for? + options: + - FlagScale + - FlagGems + - FlagCX + - FlagTree + - vLLM-FL + - vLLM-plugin-FL + - TE-FL + - Megatron-LM-FL + validations: + required: true + + - type: dropdown + id: test-type + attributes: + label: Test Type + description: What type of test is this? + options: + - train + - inference + - hetero_train + - unit + - integration + - benchmark + validations: + required: true + + - type: input + id: model-name + attributes: + label: Model Name + description: Name of the model being tested (if applicable) + placeholder: e.g., llama2, mixtral, deepseek + + - type: textarea + id: description + attributes: + label: Test Case Description + description: Describe what this test case validates + placeholder: | + This test case validates ... + validations: + required: true + + - type: textarea + id: config + attributes: + label: Configuration + description: Paste the YAML configuration for the test case + render: yaml + validations: + required: true + + - type: textarea + id: gold-values + attributes: + label: Gold Values + description: Paste the expected gold values (JSON format) + render: json + + - type: textarea + id: environment + attributes: + label: Environment Requirements + description: Describe the hardware/software requirements + placeholder: | + - GPU: 8x A100 80GB + - CUDA: 12.1 + - Python: 3.10 + validations: + required: true + + - type: checkboxes + id: checklist + attributes: + label: Submission Checklist + options: + - label: I have tested this test case locally + required: true + - label: I have included gold values (if applicable) + - label: I have added a README.md with test description + required: true + - label: My YAML configuration follows the schema specification + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..4f688bf --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,37 @@ +## Test Case PR + +### Target Repository + +- [ ] FlagScale +- [ ] FlagGems +- [ ] FlagCX +- [ ] FlagTree +- [ ] vLLM-FL +- [ ] vLLM-plugin-FL +- [ ] TE-FL +- [ ] Megatron-LM-FL + +### Test Type + +- [ ] train +- [ ] inference +- [ ] hetero_train +- [ ] unit +- [ ] integration + +### Description + + + +### Environment Requirements + +- GPU: +- CUDA: +- Python: + +### Checklist +- [ ] YAML configuration passes schema validation +- [ ] Gold values are included (if applicable) +- [ ] README.md is present for each test case +- [ ] Test case has been verified locally +- [ ] No sensitive data (tokens, passwords, private paths) in configs diff --git a/.github/scripts/detect_changed_repos.js b/.github/scripts/detect_changed_repos.js new file mode 100644 index 0000000..08ea150 --- /dev/null +++ b/.github/scripts/detect_changed_repos.js @@ -0,0 +1,65 @@ +// Detect which repos have changed test cases. +// +// Outputs (via core.setOutput): +// changed_cases — JSON array of case paths (manual single-case dispatch) +// changed_repos — JSON object {repo, task, model} (manual repo dispatch or _none_) +// changed_repos_list — JSON array of repo names (auto-detected from PR/push) +// +// Called from workflow via: +// uses: actions/github-script@v7 +// with: +// script: | +// const run = require('./.github/scripts/detect_changed_repos.js'); +// await run({ github, context, core }); + +module.exports = async ({ github, context, core }) => { + const inputCase = process.env.INPUT_CASE || ''; + const inputRepo = process.env.INPUT_REPO || ''; + const inputTask = process.env.INPUT_TASK || ''; + const inputModel = process.env.INPUT_MODEL || ''; + + // Manual dispatch — single case + if (inputCase) { + core.setOutput('changed_cases', JSON.stringify([inputCase])); + return; + } + + // Manual dispatch — by repo + if (inputRepo) { + core.setOutput('changed_repos', JSON.stringify({ + repo: inputRepo, + task: inputTask, + model: inputModel, + })); + return; + } + + // Auto-detect from changed files + let files = []; + if (context.eventName === 'pull_request') { + const resp = await github.paginate( + github.rest.pulls.listFiles, + { owner: context.repo.owner, repo: context.repo.repo, pull_number: context.issue.number } + ); + files = resp.map(f => f.filename); + } else { + const resp = await github.rest.repos.compareCommits({ + owner: context.repo.owner, repo: context.repo.repo, + base: context.payload.before, head: context.payload.after, + }); + files = resp.data.files.map(f => f.filename); + } + + // Extract unique repos from changed paths + const repos = new Set(); + for (const f of files) { + const m = f.match(/^flagos-user-tests\/tests\/([^/]+)\//); + if (m && m[1] !== 'experimental') repos.add(m[1]); + } + + if (repos.size === 0) { + core.setOutput('changed_repos', JSON.stringify({ repo: '_none_' })); + } else { + core.setOutput('changed_repos_list', JSON.stringify([...repos])); + } +}; diff --git a/.github/workflows/nightly_integration.yml b/.github/workflows/nightly_integration.yml new file mode 100644 index 0000000..9b51668 --- /dev/null +++ b/.github/workflows/nightly_integration.yml @@ -0,0 +1,94 @@ +name: Nightly Integration Test - User Tests + +on: + schedule: + - cron: "0 2 * * *" + workflow_dispatch: + +defaults: + run: + working-directory: flagos-user-tests + +jobs: + discover-cases: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.resolve.outputs.matrix }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install dependencies + run: pip install pyyaml + + - name: Discover all test cases and resolve runner labels + id: resolve + working-directory: flagos-user-tests + run: | + python3 -c " + import json, os, sys + sys.path.insert(0, 'tools') + from run_user_tests import list_test_resources + from pathlib import Path + + root = Path('.') + resources_list = list_test_resources(root) + + matrix_entries = [] + for entry in resources_list: + matrix_entries.append({ + 'case_path': entry['case_path'], + 'runner_labels': json.dumps(entry['runner_labels']), + }) + + if not matrix_entries: + matrix_entries.append({ + 'case_path': '_none_', + 'runner_labels': json.dumps(['ubuntu-latest']), + }) + + matrix = {'include': matrix_entries} + output = json.dumps(matrix) + print(f'Matrix: {output}') + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write(f'matrix={output}\n') + " + + run-tests: + needs: discover-cases + if: ${{ !contains(needs.discover-cases.outputs.matrix, '_none_') }} + runs-on: ${{ fromJson(matrix.runner_labels) }} + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.discover-cases.outputs.matrix) }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install runner dependencies + run: pip install pyyaml + + - name: Run test case + run: python tools/run_user_tests.py --case ${{ matrix.case_path }} + + notify: + needs: run-tests + if: always() + runs-on: ubuntu-latest + steps: + - name: Generate summary + run: | + echo "## Nightly Integration Test Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "Run: ${{ github.run_number }}" >> $GITHUB_STEP_SUMMARY + echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/pr_validation.yml b/.github/workflows/pr_validation.yml new file mode 100644 index 0000000..0083b57 --- /dev/null +++ b/.github/workflows/pr_validation.yml @@ -0,0 +1,85 @@ +name: PR Validation - User Tests + +on: + pull_request: + branches: [main] + paths: + - "flagos-user-tests/**" + +defaults: + run: + working-directory: flagos-user-tests + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install dependencies + run: pip install pyyaml jsonschema + + # Get the actual list of changed files in the PR (github.event.pull_request.changed_files is just a count) + - name: Get changed files + id: changed + uses: actions/github-script@v7 + with: + script: | + const files = await github.paginate( + github.rest.pulls.listFiles, + { owner: context.repo.owner, repo: context.repo.repo, pull_number: context.issue.number } + ); + const changed = files + .map(f => f.filename) + .filter(f => f.startsWith('flagos-user-tests/')) + .map(f => f.replace('flagos-user-tests/', '')); + core.setOutput('files', changed.join(',')); + + # Step 1: Schema validation — only validate changed files + - name: Validate YAML/JSON Schema + run: | + python tools/validators/validate_config.py \ + --changed-files "${{ steps.changed.outputs.files }}" + + # Step 2: Required fields check + - name: Check Required Fields + run: python tools/validators/lint_test_case.py --strict + + # Step 3: Gold values format validation + - name: Validate Gold Values + run: python tools/validators/validate_gold_values.py + + # Step 4: Documentation completeness check + - name: Check Documentation + run: | + errors=0 + # Skip sub-config directories (conf/train/data etc.) + SUB_CONFIG_DIRS="conf train inference data" + for dir in $(find tests -mindepth 3 -maxdepth 5 -type d); do + dirname=$(basename "$dir") + # Skip sub-config directories + skip=false + for sub in $SUB_CONFIG_DIRS; do + if [ "$dirname" = "$sub" ]; then skip=true; break; fi + done + if [ "$skip" = "true" ]; then continue; fi + + # If the directory contains .yaml files, check for README.md + if ls "$dir"/*.yaml 1>/dev/null 2>&1; then + if [ ! -f "$dir/README.md" ]; then + echo "ERROR: Missing README.md in $dir" + errors=$((errors + 1)) + fi + fi + done + if [ $errors -gt 0 ]; then + echo "Found $errors test case directories without README.md" + exit 1 + fi + echo "All test case directories have README.md" diff --git a/.github/workflows/test_dispatch.yml b/.github/workflows/test_dispatch.yml new file mode 100644 index 0000000..185a9b8 --- /dev/null +++ b/.github/workflows/test_dispatch.yml @@ -0,0 +1,104 @@ +name: Test Dispatch - User Tests + +on: + push: + branches: [main] + paths: + - "flagos-user-tests/tests/**" + pull_request: + branches: [main] + paths: + - "flagos-user-tests/tests/**" + workflow_dispatch: + inputs: + repo: + description: "Target repository (e.g., flagscale, flaggems)" + required: false + type: string + task: + description: "Task type (train/inference/hetero_train)" + required: false + type: string + model: + description: "Model name (e.g., mixtral, deepseek)" + required: false + type: string + case: + description: "Specific test case YAML path (relative to flagos-user-tests/)" + required: false + type: string + +defaults: + run: + working-directory: flagos-user-tests + +jobs: + detect-changes: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.resolve.outputs.matrix }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install dependencies + run: pip install pyyaml + + - name: Detect changed repos + id: detect + uses: actions/github-script@v7 + env: + INPUT_CASE: ${{ inputs.case }} + INPUT_REPO: ${{ inputs.repo }} + INPUT_TASK: ${{ inputs.task }} + INPUT_MODEL: ${{ inputs.model }} + with: + script: | + const run = require('./.github/scripts/detect_changed_repos.js'); + await run({ github, context, core }); + + - name: Resolve resources to matrix + id: resolve + working-directory: flagos-user-tests + run: | + python tools/resolve_matrix.py \ + --changed-cases '${{ steps.detect.outputs.changed_cases }}' \ + --changed-repos '${{ steps.detect.outputs.changed_repos }}' \ + --changed-repos-list '${{ steps.detect.outputs.changed_repos_list }}' + + run-tests: + needs: detect-changes + if: ${{ needs.detect-changes.outputs.matrix != '' && !contains(needs.detect-changes.outputs.matrix, '_none_') }} + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.detect-changes.outputs.matrix) }} + runs-on: ${{ fromJson(matrix.runner_labels) }} + container: + image: ${{ matrix.container_image }} + options: ${{ matrix.container_options }} + volumes: ${{ fromJson(matrix.container_volumes) }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install runner dependencies + run: pip install pyyaml + + - name: Run user tests + run: | + ARGS="" + if [ -n "${{ matrix.case_path }}" ]; then + ARGS="--case ${{ matrix.case_path }}" + else + ARGS="--repo ${{ matrix.repo }}" + [ -n "${{ matrix.task }}" ] && ARGS="$ARGS --task ${{ matrix.task }}" + [ -n "${{ matrix.model }}" ] && ARGS="$ARGS --model ${{ matrix.model }}" + fi + python tools/run_user_tests.py $ARGS diff --git a/flagos-user-tests/CONTRIBUTING.md b/flagos-user-tests/CONTRIBUTING.md new file mode 100644 index 0000000..2c606b2 --- /dev/null +++ b/flagos-user-tests/CONTRIBUTING.md @@ -0,0 +1,76 @@ +# Contributing to FlagOS User Tests + +Thank you for contributing test cases to the FlagOS ecosystem! + +## How to Submit a Test Case + +### Step 1: Generate a Template + +Use the built-in generator to create a properly structured test case: + +```bash +# FlagScale training test case +python tools/generators/create_test_template.py \ + --repo flagscale \ + --type train \ + --model \ + --name + +# Other repositories +python tools/generators/create_test_template.py \ + --repo \ + --name +``` + +### Step 2: Complete the Test Case + +1. **Edit the YAML config** with your actual test parameters +2. **Add gold values** from a verified local run (JSON format) +3. **Complete the README.md** with: + - Description of what the test validates + - Environment requirements (GPU, CUDA, Python) + - Manual execution instructions + +### Step 3: Validate Locally + +```bash +python tools/validators/validate_config.py +python tools/validators/validate_gold_values.py +python tools/validators/lint_test_case.py --strict +``` + +### Step 4: Submit a Pull Request + +1. Fork this repository +2. Create a feature branch: `git checkout -b add-test//` +3. Add your test case files +4. Commit and push +5. Open a Pull Request using the provided template + +## Test Case Requirements + +- Each test case must be in its own directory +- Each directory must contain: + - At least one `.yaml` configuration file + - A `README.md` with test documentation + - Gold values JSON file (for regression tests) +- No sensitive data (tokens, passwords, private paths) in any files +- YAML must pass schema validation +- Gold values must contain numeric arrays + +## Code Review + +- PRs are reviewed by the respective team CODEOWNERS +- CI must pass before merge +- At least one approval from a maintainer is required + +## Experimental Test Cases + +If your test case covers a new or unstable feature: +- Place it under `tests/experimental/` +- It will only run in nightly integration tests +- It will not block PR merges + +## Questions? + +Open an issue using the "New Test Case" template or contact the DevOps team. diff --git a/flagos-user-tests/README.md b/flagos-user-tests/README.md new file mode 100644 index 0000000..9ae4f32 --- /dev/null +++ b/flagos-user-tests/README.md @@ -0,0 +1,69 @@ +# FlagOS User Tests + +User-perspective test cases for FlagOS repositories. Each test case defines its own setup, run, and verification — exactly as a real user would operate. + +## How It Works + +``` +User submits test case YAML: + setup: [pip install flagscale] + run: [flagscale train mixtral --config ./conf/xxx.yaml] + verify: {log_path: ..., gold_values_path: ...} + +CI runner: + 1. cd + 2. Execute setup commands + 3. Execute run commands + 4. Extract metrics from log + 5. Compare against gold values → PASS/FAIL +``` + +Users have full control — the runner does NOT call internal repo scripts. + +## Quick Start + +```bash +# Generate template +python tools/generators/create_test_template.py \ + --repo flagscale --type train --model llama2 --name tp2_pp1 + +# Validate +python tools/validators/validate_config.py +python tools/validators/validate_gold_values.py +python tools/validators/lint_test_case.py --strict + +# Run locally +python tools/run_user_tests.py \ + --case tests/flagscale/train/llama2/tp2_pp1/tp2_pp1.yaml +``` + +See [docs/getting_started.md](docs/getting_started.md) for the full guide. + +## Test Case Structure (FlagScale Example) + +``` +tests/flagscale/train/mixtral/tp2_pp1_ep2/ +├── tp2_pp1_ep2.yaml # Test case: setup → run → verify +├── conf/ # FlagScale configs (user provides) +│ ├── tp2_pp1_ep2.yaml +│ └── train/tp2_pp1_ep2.yaml +├── gold_values/ # Expected metrics +│ └── tp2_pp1_ep2.json +└── README.md +``` + +## Supported Repositories + +FlagScale, FlagGems, FlagCX, FlagTree, vLLM-FL, vLLM-plugin-FL, TE-FL, Megatron-LM-FL + +## CI Workflows (in `../.github/workflows/`) + +| Workflow | Trigger | Description | +|---|---|---| +| PR Validation | Pull Request | Format, lint, gold values checks | +| Test Dispatch | Push/PR | Run user-defined setup → run → verify | +| Nightly | Daily 02:00 UTC | All test cases | + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md). diff --git a/flagos-user-tests/docs/getting_started.md b/flagos-user-tests/docs/getting_started.md new file mode 100644 index 0000000..1025909 --- /dev/null +++ b/flagos-user-tests/docs/getting_started.md @@ -0,0 +1,99 @@ +# Getting Started + +## Overview + +`flagos-user-tests` manages **user-perspective** test cases for FlagOS repositories. Each test case defines its own setup, run, and verification commands — exactly as a real user would operate. + +## Quick Start + +### 1. Generate a template + +```bash +# FlagScale training test +python tools/generators/create_test_template.py \ + --repo flagscale --type train --model llama2 --name tp2_pp1 + +# Generic test +python tools/generators/create_test_template.py \ + --repo flaggems --name my_operator_test +``` + +### 2. Edit the generated files + +The test case YAML defines the user workflow: + +```yaml +# tests/flagscale/train/llama2/tp2_pp1/tp2_pp1.yaml +meta: + repo: flagscale + task: train + model: llama2 + case: tp2_pp1 + description: "LLaMA2 training with TP=2, PP=1" + +resources: + gpu: A100-80GB + gpu_count: 8 + +setup: + - pip install flagscale # user installs the package + +run: + - flagscale train llama2 --config ./conf/tp2_pp1.yaml # user runs training + +verify: + log_path: ".../stdout.log" # where to find output + gold_values_path: ./gold_values/tp2_pp1.json # expected metrics +``` + +Also edit the FlagScale config files (`conf/*.yaml`) and fill in gold values from a verified run. + +### 3. Validate locally + +```bash +python tools/validators/validate_config.py +python tools/validators/validate_gold_values.py +python tools/validators/lint_test_case.py --strict +``` + +### 4. Run locally (optional) + +```bash +python tools/run_user_tests.py \ + --case tests/flagscale/train/llama2/tp2_pp1/tp2_pp1.yaml +``` + +### 5. Submit a PR + +CI will automatically: +1. Validate format (PR Validation workflow) +2. Run your test case on real hardware (Test Dispatch workflow) + +## How the Runner Works + +`run_user_tests.py` is a **generic executor**: + +``` +┌─────────────┐ ┌──────────────────────────────────────┐ +│ Test Case │ ──▶ │ 1. cd │ +│ YAML │ │ 2. Execute setup commands │ +│ │ │ 3. Execute run commands │ +│ │ │ 4. Find log file (glob pattern) │ +│ │ │ 5. Extract metrics from log │ +│ │ │ 6. Compare against gold values │ +└─────────────┘ └──────────────────────────────────────┘ +``` + +It does **not** call any internal repo scripts. Users have full control over: +- What to install (`setup`) +- How to run (`run`) +- What to verify (`verify`) +- Machine requirements (`resources`) — mapped to runner labels via `resource_map.yaml` + +## CI Workflows + +| Workflow | Trigger | Description | +|---|---|---| +| PR Validation | Pull Request | Format/lint/gold-values checks | +| Test Dispatch | Push to main / PR | Runs user-defined setup → run → verify | +| Nightly | Daily 02:00 UTC | All test cases across all repos | diff --git a/flagos-user-tests/docs/test_format_spec.md b/flagos-user-tests/docs/test_format_spec.md new file mode 100644 index 0000000..a916e0d --- /dev/null +++ b/flagos-user-tests/docs/test_format_spec.md @@ -0,0 +1,177 @@ +# Test Format Specification + +## Core Concept: User-Perspective Test Cases + +Every test case is a **self-contained YAML file** that defines the complete workflow from a **user's perspective**: + +```yaml +meta: # What is this test? +resources: # Hardware requirements (platform, device, device_count) +setup: # How to install? (user's commands) +run: # How to run? (user's commands) +verify: # How to check? (gold values comparison) +``` + +The runner (`run_user_tests.py`) simply executes these user-defined commands. It does NOT call any internal repo scripts — giving users full control and matching real usage scenarios. + +## Test Case YAML Format + +### Complete Example (FlagScale) + +```yaml +meta: + repo: flagscale + task: train + model: mixtral + case: tp2_pp1_ep2 + description: "Mixtral MoE training with TP=2, PP=1, EP=2" + +resources: + platform: cuda + device: A100-80GB + device_count: 8 + +env: + CUDA_VISIBLE_DEVICES: "0,1,2,3,4,5,6,7" + +setup: + - pip install flagscale + +run: + - flagscale train mixtral --config ./conf/tp2_pp1_ep2.yaml + +verify: + log_path: "tests/functional_tests/train/mixtral/test_results/tp2_pp1_ep2/logs/details/host_0_localhost/*/default_*/attempt_0/*/stdout.log" + gold_values_path: ./gold_values/tp2_pp1_ep2.json + rtol: 1e-5 + atol: 0 +``` + +### Complete Example (Generic) + +```yaml +meta: + repo: flaggems + case: my_operator_test + description: "Test custom operator correctness" + +setup: + - pip install flaggems + +run: + - pytest -v tests/test_my_operator.py + +# No verify step — pytest exit code determines pass/fail +``` + +### Field Reference + +| Field | Type | Required | Description | +|---|---|---|---| +| `meta.repo` | string | Yes | Target FlagOS repository name | +| `meta.task` | string | No | Task type (train/inference/hetero_train) | +| `meta.model` | string | No | Model name | +| `meta.case` | string | No | Case name (for filtering) | +| `meta.description` | string | Yes | What this test validates | +| `resources` | object | No | Hardware requirements | +| `resources.platform` | string | No | Chip platform: `cuda`, `metax`, `ascend` (default: `cuda`) | +| `resources.device` | string | No | Device type (e.g. `A100-40GB`, `C500`, `Ascend910B`) | +| `resources.device_count` | int | No | Number of devices required | +| `env` | object | No | Environment variables | + +### Resource Resolution + +The `resources` field drives CI decisions via `resource_map.yaml` (platform-based): + +1. **Runner selection**: `resources.platform` + `resources.device` -> platform-specific runner labels +2. **Container image**: `resources.platform` + `meta.repo/task` -> platform-specific Docker image +3. **Container options**: `resources.platform` -> device passthrough flags (`--gpus all`, `--device /dev/davinci_all`, etc.) + +Supported platforms: + +| Platform | Vendor | Devices | Status | +|---|---|---|---| +| `cuda` | NVIDIA | A100, H100, H800 | Active | +| `metax` | MetaX (Muxi) | C500 | Planned | +| `ascend` | Huawei | Ascend910B, Ascend910C | Planned | + +The test job runs inside the platform-resolved Docker container with device access. + +### Field Reference (continued) + +| Field | Type | Required | Description | +|---|---|---|---| +| `setup` | list[str] | No | Shell commands for environment setup | +| `run` | list[str] | Yes | Shell commands to execute the test | +| `verify.log_path` | string | No | Path to output log (supports glob patterns) | +| `verify.gold_values_path` | string | No | Path to gold values JSON file | +| `verify.gold_values` | object | No | Inline gold values (alternative to file) | +| `verify.rtol` | float | No | Relative tolerance (default: 1e-5) | +| `verify.atol` | float | No | Absolute tolerance (default: 0) | + +### Working Directory + +All commands execute with the **test case directory** as the working directory. So `./conf/tp2_pp1_ep2.yaml` resolves relative to where the test case YAML lives. + +## Gold Values Format + +### Numeric (default) + +```json +{ + "lm loss:": { + "values": [11.17587, 11.16908, 10.41927] + } +} +``` + +- Keys are metric names extracted from log files +- Values are numeric arrays +- Comparison uses `rtol` / `atol` similar to `numpy.allclose` +- `log_path` supports glob patterns for timestamp directories + +### Text + +```json +{ + "inference_output": { + "type": "text", + "pattern": "output\\.outputs\\[0\\]\\.text=(?:\"(.+?)\"$|'(.+?)'$)", + "values": [ + " Lina. I'm a 22-year", + " the same as the president of the United Nations." + ] + } +} +``` + +- Set `"type": "text"` to enable text comparison +- `"pattern"` is a regex with capture group(s) to extract text from log lines + - If multiple groups (e.g. alternation), the first non-None group is used +- Values are compared with exact string match + +## FlagScale Test Case Directory Structure + +``` +tests/flagscale/train/mixtral/tp2_pp1_ep2/ +├── tp2_pp1_ep2.yaml # Test case definition (setup/run/verify) +├── conf/ +│ ├── tp2_pp1_ep2.yaml # FlagScale experiment config (Hydra) +│ └── train/ +│ └── tp2_pp1_ep2.yaml # Training parameters +├── gold_values/ +│ └── tp2_pp1_ep2.json # Expected metrics +└── README.md +``` + +The user runs: `pip install flagscale && flagscale train mixtral --config ./conf/tp2_pp1_ep2.yaml` + +## README Requirements + +Each test case directory must have a `README.md` with: +1. **Description** section +2. **Environment** section + +## Experimental Test Cases + +Place under `tests/experimental/` for gray-stage tests (nightly only, non-blocking). diff --git a/flagos-user-tests/repos.yaml b/flagos-user-tests/repos.yaml new file mode 100644 index 0000000..fdde8e1 --- /dev/null +++ b/flagos-user-tests/repos.yaml @@ -0,0 +1,45 @@ +# FlagOS target repository configuration +# +# Note: Each test case defines its own setup/run/verify workflow (user perspective). +# This file only records basic repository info for CI repo-level filtering and issue templates. + +repositories: + flagscale: + url: https://github.com/FlagOpen/FlagScale.git + default_branch: main + description: Large-scale distributed training framework + + flaggems: + url: https://github.com/FlagOpen/FlagGems.git + default_branch: main + description: GPU-accelerated math library + + flagcx: + url: https://github.com/FlagOpen/FlagCX.git + default_branch: main + description: Cross-chip communication library + + flagtree: + url: https://github.com/FlagOpen/FlagTree.git + default_branch: main + description: Tree-structured computation library + + vllm-fl: + url: https://github.com/FlagOpen/vLLM-FL.git + default_branch: main + description: LLM inference engine + + vllm-plugin-fl: + url: https://github.com/FlagOpen/vLLM-plugin-FL.git + default_branch: main + description: vLLM plugin system + + te-fl: + url: https://github.com/FlagOpen/TransformerEngine-FL.git + default_branch: main + description: Transformer Engine + + megatron-lm-fl: + url: https://github.com/FlagOpen/Megatron-LM-FL.git + default_branch: main + description: Megatron-LM fork diff --git a/flagos-user-tests/resource_map.yaml b/flagos-user-tests/resource_map.yaml new file mode 100644 index 0000000..6cc25a1 --- /dev/null +++ b/flagos-user-tests/resource_map.yaml @@ -0,0 +1,99 @@ +# Maps test case resource requirements to GitHub Actions runner labels and container images. +# +# Architecture: platform-based multi-vendor support +# resources.platform -> platforms. -> runner labels, container images, options +# +# Example test case YAML: +# resources: +# platform: cuda +# device: A100-40GB +# device_count: 1 +# +# Resolution chain: +# 1. resources.platform -> platforms.cuda +# 2. platforms.cuda.device_labels["A100-40GB"] -> runner labels +# 3. platforms.cuda.container_images["flagscale/inference"] -> Docker image +# 4. platforms.cuda.container_options -> Docker runtime flags + +# ============================================================================= +# Platforms: each vendor/chip family is a platform +# ============================================================================= +platforms: + + # --------------------------------------------------------------------------- + # NVIDIA CUDA platform + # --------------------------------------------------------------------------- + cuda: + description: "NVIDIA CUDA GPUs (A100, H100, H800, etc.)" + + # Device type -> self-hosted runner labels + device_labels: + A100-40GB: [self-hosted, Linux, X64, gpu-a100-40gb] + A100-80GB: [self-hosted, Linux, X64, gpu-a100-80gb] + H100-80GB: [self-hosted, Linux, X64, gpu-h100-80gb] + H800-80GB: [self-hosted, Linux, X64, gpu-h800-80gb] + + # Default runner labels when device type not found + default_labels: [self-hosted, Linux, X64] + + # Container images: "/" -> Docker image + container_images: + flagscale/train: "localhost:5000/flagscale-train:dev-cu128-py3.12-20260228210721" + flagscale/inference: "localhost:5000/flagscale-inference:dev-cu128-py3.12-20260302102033" + flagscale/hetero_train: "localhost:5000/flagscale-train:dev-cu128-py3.12-20260228210721" + + # Container runtime options + container_options: "--gpus all --shm-size=500g --user root --ulimit nofile=65535:65535" + + # Container volume mounts (host:container) + container_volumes: + - /home/flagscale_cicd/docker/docker_build/docker_data:/home/gitlab-runner/data + - /home/flagscale_cicd/docker/docker_build/docker_tokenizers:/home/gitlab-runner/tokenizers + + # --------------------------------------------------------------------------- + # MetaX (Muxi) platform — placeholder for future integration + # --------------------------------------------------------------------------- + metax: + description: "MetaX (Muxi) GPUs (C500, etc.)" + + device_labels: + C500: [self-hosted, Linux, X64, metax-c500] + + default_labels: [self-hosted, Linux, X64, metax] + + container_images: {} + # flagscale/train: "registry.example.com/flagscale-train:metax-..." + # flagscale/inference: "registry.example.com/flagscale-inference:metax-..." + + container_options: "--device /dev/mxgpu_all --shm-size=500g --user root" + + container_volumes: [] + + # --------------------------------------------------------------------------- + # Ascend (Huawei) platform — placeholder for future integration + # --------------------------------------------------------------------------- + ascend: + description: "Huawei Ascend NPUs (910B, 910C, etc.)" + + device_labels: + Ascend910B: [self-hosted, Linux, aarch64, ascend-910b] + Ascend910C: [self-hosted, Linux, aarch64, ascend-910c] + + default_labels: [self-hosted, Linux, aarch64, ascend] + + container_images: {} + # flagscale/train: "registry.example.com/flagscale-train:ascend-..." + + container_options: "--device /dev/davinci_all --shm-size=500g --user root" + + container_volumes: [] + +# ============================================================================= +# Global defaults +# ============================================================================= + +# Default platform when resources.platform is not specified +default_platform: cuda + +# Fallback runner labels when nothing matches +default_labels: [self-hosted] diff --git a/flagos-user-tests/tests/flagcx/.gitkeep b/flagos-user-tests/tests/flagcx/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/flaggems/.gitkeep b/flagos-user-tests/tests/flaggems/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/flagscale/hetero_train/.gitkeep b/flagos-user-tests/tests/flagscale/hetero_train/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/.gitignore b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/.gitignore new file mode 100644 index 0000000..2301c87 --- /dev/null +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/.gitignore @@ -0,0 +1,2 @@ +FlagScale/ +outputs/ diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md new file mode 100644 index 0000000..b38f61b --- /dev/null +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md @@ -0,0 +1,25 @@ +# demo_0_6b + +## Description + +FlagScale inference demo using Qwen3-0.6B model with vLLM backend. +Runs 4 prompts with greedy decoding (temperature=0, max_tokens=10) and verifies output text against gold values. + +## Environment + +- GPU: 1x A100 40GB +- CUDA: 12.1+ +- Python: 3.12 +- vLLM: 0.10.1.dev + +## How to Run + +```bash +git clone https://github.com/FlagOpen/FlagScale.git && cd FlagScale && pip install . +flagscale inference qwen3 --config ./conf/demo_0_6b.yaml +``` + +## Gold Values + +Uses text-type gold values to verify inference output. +Greedy decoding (temperature=0) produces deterministic output, so text comparison is exact match. diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/demo_0_6b.yaml new file mode 100644 index 0000000..0f15416 --- /dev/null +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/demo_0_6b.yaml @@ -0,0 +1,27 @@ +defaults: + - _self_ + - inference: demo_0_6b + +experiment: + exp_name: qwen3 + exp_dir: ./outputs/${experiment.exp_name} + task: + type: inference + backend: vllm + entrypoint: flagscale/inference/inference_llm.py + runner: + hostfile: null + cmds: + before_start: source /root/miniconda3/bin/activate flagscale-inference + envs: + VLLM_PLUGINS: "fl" + VLLM_USE_FLASHINFER_SAMPLER: 0 + VLLM_LOGGING_LEVEL: "INFO" + CUDA_VISIBLE_DEVICES: 0 + CUDA_DEVICE_MAX_CONNECTIONS: 1 + +action: run + +hydra: + run: + dir: ${experiment.exp_dir}/hydra diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml new file mode 100644 index 0000000..d941f2b --- /dev/null +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml @@ -0,0 +1,18 @@ +llm: + model: /share/project/models/Qwen/Qwen3-0.6B + trust_remote_code: true + tensor_parallel_size: 1 + pipeline_parallel_size: 1 + gpu_memory_utilization: 0.9 + seed: 1234 + +generate: + prompts: [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + sampling: + max_tokens: 10 + temperature: 0.0 diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml new file mode 100644 index 0000000..7f997c7 --- /dev/null +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml @@ -0,0 +1,38 @@ +meta: + repo: flagscale + task: inference + model: qwen3 + case: demo_0_6b + description: > + Qwen3-0.6B inference demo using vLLM backend with FlagScale CLI. + Runs 4 prompts with greedy decoding (temperature=0) and verifies output text. + +resources: + platform: cuda + device: A100-40GB + device_count: 1 + +env: + CUDA_VISIBLE_DEVICES: "0" + VLLM_PLUGINS: "fl" + VLLM_USE_FLASHINFER_SAMPLER: "0" + VLLM_LOGGING_LEVEL: "INFO" + CUDA_DEVICE_MAX_CONNECTIONS: "1" + +setup: + - git clone https://github.com/FlagOpen/FlagScale.git && cd FlagScale && pip install . + +run: + - flagscale inference qwen3 --config ./conf/demo_0_6b.yaml + - | + pid_file="./outputs/qwen3/inference_logs/pids/host_0_localhost.pid" + if [ -f "$pid_file" ]; then + pid=$(cat "$pid_file") + echo "Waiting for inference process $pid to complete..." + while kill -0 "$pid" 2>/dev/null; do sleep 2; done + echo "Inference process completed." + fi + +verify: + log_path: "./outputs/qwen3/inference_logs/host_0_localhost.output" + gold_values_path: ./gold_values/demo_0_6b.json diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/gold_values/demo_0_6b.json b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/gold_values/demo_0_6b.json new file mode 100644 index 0000000..2a28edd --- /dev/null +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/gold_values/demo_0_6b.json @@ -0,0 +1,12 @@ +{ + "inference_output": { + "type": "text", + "pattern": "output\\.outputs\\[0\\]\\.text=(?:\"(.+?)\"$|'(.+?)'$)", + "values": [ + " Lina. I'm a 22-year", + " the same as the president of the United Nations.", + " Paris. The capital of France is also the capital", + " not just a technological challenge but a profound transformation of" + ] + } +} diff --git a/flagos-user-tests/tests/flagscale/train/.gitkeep b/flagos-user-tests/tests/flagscale/train/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/flagtree/.gitkeep b/flagos-user-tests/tests/flagtree/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/megatron-lm-fl/.gitkeep b/flagos-user-tests/tests/megatron-lm-fl/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/te-fl/.gitkeep b/flagos-user-tests/tests/te-fl/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/vllm-fl/.gitkeep b/flagos-user-tests/tests/vllm-fl/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tests/vllm-plugin-fl/.gitkeep b/flagos-user-tests/tests/vllm-plugin-fl/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/flagos-user-tests/tools/__pycache__/run_user_tests.cpython-312.pyc b/flagos-user-tests/tools/__pycache__/run_user_tests.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05fdd462b2ee932f1644c0ed136243d22786ae4d GIT binary patch literal 22466 zcmb_^Yj7LKncxiGPXZub1Rvsvq(p)uAyJZLi_v2xQWEvBWQ($6$&$Gc1|&g&06har zA_6ANR<;5rsuNjuE#Y&#hBnR_K6@`rZSKNcU2Wv#cGp+AAE3cR(1cf|tX;L6xTlak{7b>#v_*_xEc2+-|o}@YtLGbL^QRiuwpYD=|aVHfmsv-_TvLtMW}zD{GKpAm0r6b!Lj~W-U=0YlS~s)WO=}&k%J+T?(WI zv5#8Vx^L*O)U&%-hn&MY0iz4bY0^fh-w<_=n%H`P@&J@OYGOU`SIsl9UdZ#ZJ}BXb z94sxCX@Hja%oMbxhYdhZBjkwXnv__x62m^2q&BqxMnAg^%3z;$Y%5@DU>T@kJHd|Q z&<1G%wjI($IBC2frvq{tNo`n96~YR=-2r(`sx-v36VjUhMeVdn?d*hhwy<5_pss9- zwz9z+T@=NcG*ookyVA0tsTs|TCfGaOsHLcj(+Ospk8-{I^!3SDDivj!RFqFKkuV?S znebRRmVmF5@$lG%VTOxNCHYt?$;G04$YMDWj*M5KnHaz`d^A4V8%ZWoP!MokKKArE z20lk)V@zs1oMPDMC}i;c7KZ5s;MDX~KQo@Z#iWu9))bD%0WL~vV!{a);HRPqHkyb) zl}s15w3BD1q8vXJjih2Xqd|g*n@%WbqPL@w=~R@UAzj(B)XgXt8=X-KM#iI&8&ESp z9ZyxeK9-EL%*}9oI?7utm&T!!P+20%F??)tDn7$V)$!seirr(O%afDPe4hE+xmTGO zbUO*nKY!s8jB7k@2`6Ttx2Y(X2*<^)l7{jTE;g0oLky|rMl?FbSBDo`lp=je?Ai za?loBGGDBAgk+R@zYyYGsW5+I*JP57#{V^SK$j5#L%Ikb1Q8>3)$$u8m-9hd&^;4Q zjSKoXu3N!68BKAq$jFW83@;csSioE&0^6+em>~vqMLiy(ar4d4gVbG3L=&STnrnKB zn$=`9W7I2}J4PKftA#IWkh*po(sc-V?PZARQ&3WTCe)avw2XcN@N9lq`dz7%9L^dt zhSw=p^M+>DsKRL2T#}+P#*FS=sis*|#>CTiPME2THlxQaWdeJ&=}B4Tgp6szF2}1O ztCd@^Ii#sOypFn))&c%u7!w-)W;M5{b}A(wTT}2?cKLuEF#eT>EKnqyZ

x?bGXd+q2| zcGuMqyt})O_P-cCaZOGP9u58mCpKsjG+b0LM1d-&Amn&hhEsxpkA}I(xL^oR;XWzo zN8`zGO3;sS$>}M<@;vTaCpa$21PVLb~*5jiUur^0X& za0x*VLlqh4aRrcm;p=y&ck|<6F1mY)OHRNhygMG7nNE*SCwG%mVoKaRl6)^YR>=0U zdy73sXpD-JIwDRcKZ4UZLZ*;wgi3e7AO9Y_=BQ8VsMhXMUH5A@a<1&l3vKh$xf^q* zign!|IlWokuiV?R1D_fwe_$!L7|Uzlw%oH6_I=k;^6klNOwVZu}`j!<)mr4f_SsBiS$3AhJY_!49FK)>`zhQ2MNv>>g*y?xC%XsRWK}wFX8fAZgU1g5G^QDYtLfHdkjIfg!)O6}^n zd>F{Ott+(aXs|8QNy5&cxEDmE2_l1t)4dP=9`A(LS&GI3gt`V(G)re_7h1cIx|2Rh#{R8)oS> zA{b$Vor=Yy9G(S&4k}Fv1~O(mhueD4DDFPE`Z+hkn4}Uc=fSiFOw+;^%H#TkT?QYq z7yd1p&UFIh5%}Y;!)uPJ)KL!C+^Kbk>u!2Jo$D+)+UADH=1_JvEIAe(d0ol5BdZsz z&X4PTtNwxf6K@~7cj&DnCI7(vOQrfV*@3d7e*SA^r+=w#u`b8|!r5N2Bf&cf!9Rvt zDA;Htsp)WB&>>$a=(to8;$e74aC?;>7eokAr{GP98YCY;DRnE8f`uGSX9>IoZRctbhT zK_WEH>hFBHx|L+iGTE!rOmfzSpTYRND+`%NrRB zYbBCSETwE{DkjC8G09^lw+fi8kTlj-GUB#bd&ZWrV{2ifbZpUH8)>gSV};h$Wo)l& zSVzYGy5ip%h5j9Ef3-){NpQGo_l>Q8Loc_Hh81zg`Y-6wmCZd$Da;x6Ee*sf z;|BIlqsq0*>1p5)(y-=k43cr^-l7lcad5*pP^<^WfnvQXoSSN7eXr}@u*mhu;{-j- z{3}PwziFiMfJdLvjpCux5WWJWHUQfa?)DTErc$ z7X+r;DT!$mlb8)i+Qy^tc(M&p+)8qBwoQ!;i?;PhnB>A6!#3&2#JagCo~pNEsd1)j z!#Np*<`66wi8KRZHpr+WQ(+#Cx*p)C`P4{q6gV^9Tx-ut{Y#w@YRFxCV9vOOLamxJedUxc4Gur z1heauo~F_C|vz?FP^v(|FMwl^)WS@Qb| zno`5Ati9}QU3zKprTodl>5}(A*79pdbJ^y(dtv@Uaogbi;rk~(a21=+uG!9&1KYA^ zAnRjmU3Ru)ZF$R`f3dW^??Y>!1YfT2Ekp{>RVaF&f8F1hJ6!Syi@xr{t{+BLc9jmC zEIx4xdGXWqr&_JOu41AZTM5EPR&UPtp|y3rW#?Ob;V{5#JC(w$6n! z4?Tg3i`q6sS3Hy}K)8<1LVLkmi2Sg9`9^8)@nYXVap3tP^W|SUUjTV!TW9Tdth;>+ zFX#Fer(fyM>+-4hBFo#~9lbZZ%Iq(>_t$D&ao3TRmscY9{U5kKz^0y~nRTW;e=Hxq zH}G9k!JYdm3`k%%j0d#UMA|AqTOFjW^;@>}{*fYc@t4lw$}mkecU7LJDVuNMcG22e z)U|S;Sp;S;GReX!kKAGfZ8UK+qGnl;mm_S@KSN}Ricu*D#iy2L<9l>QbDQQtClI5) zM@!Pp2ue38MdQKJ867YHns+5eil(Z^=mheno1To0)nw?5_C4*p5`%`4&;(JQZvs)F7Bfr{Q=2 z6fo(NEb73B!W89LaZeHV5*un5VLu^iQ0`gCeHH%rS$NG+j~qR(wk>rpb{8EzdH0_M z3L1#a4gJdD&T7|9_66O-@WP2~e9hFf?rK`vwzw^?E$m+&UKw9=oyr=@R@Vb-^QyHu zPnWFQ*KMw%r>o$55bR$K_7}Upw9;3sKfGo;Qg#O(xI0$e9Us~{Dptzrsn{r+U0k(h z)OErc2Z}u-oUAw47`H(x$CX4@?p0VdOH@@NM+3>G^{24(EXdkr813{-GYO{&fTRUr;wcetFE#ZUE{S%VWDhy_& z#29Fmh>=IJrpQV#*}5Xy)kIZxmE8s2I$Zq%{Rvo9`c)>CD$9pvkABhaVX3AP`$xZG z0I6oMpcqEe6sX5QkRY0Gr5J4g$!QMG4t32_@o)-T*UvoIk ztC)D82M{NkuMyb!nb2+mJ#s4=8yio7oliytXPCNMz16{#j7E~N2(U>;&d_d*Y+)xu zw35+Lse(bP1UZRdXhm)L=yU>Xn@KPZiq^T!HaJ)XBhsm>u4p$P;&-d;5?Gyzlb0(A z?N%kK5riiAWJ1(RVo@|GUbqOujQV&{1{TE%^^mAgsguD3B3n(`Hm1V}5MwGz$Y4)- z@`|ro=;NYI8deE$LO*J;CUxT#}?R`R(i0Za}W)DdbXv^;o9sYlK($-5zRRh2e zfYd7Jkcfb-Ld!IVs!)(~iJ3TUyhz9iL{OwF!8o0`k${DjHeMpcgrX@#(#EIJ9NEu+ z?F1uf;~)?ZNSuOIMMZ+ht)=a~y}isZ)&(o5H>jzySt2868$NOAxo0G1E^V_QIP#GY zrY#qrJ1^%rNY3-5p&}+wup);8tDE3VH&{^KMUF55lDPVQ%bf)0(k zf|(DGMn{ljxcy`ZP$&dO5sgO4_)QR9!TQAQgDk?>33Vbuc~OEqdlBE!64EG z_htOHkf9wRQiF-`h$uhUB@7beA7b$Vd=aL$9t+Yp!2%Z9Qg*c?E~K9k>^PT%B6;}Z zKZOl_j{5WuXi9I%9?#y)j%Rz`rU$ z*t*M0I8VCdYRejatEDV~ih;7#-|e68Ul`3@2i9|LuQ!9iSEu&J8{6*mb8p zJ9alYpDfk2u34BLx|d)0iEVF0?Q2TeClz)s(IF1=qKp z1Ri}Mv}W60c6+m@5!|T;?<#1Q-D~zIHh}l6*;*BFFMt>8m^IV(bw^8Xa(Vpzz?x&I zXc~g?ELwJ~>2{W_w%=Z+VUFfo|M_tjWexGj*8I>n)TI4u+Bei?_@GHYw8O~7A(i_> zc-0nE{}vc$nB90fMbgWzJQYJDeA zxwPr2ngu~{G2lOWglH;ZQ`KmER}t%H(6*s4V^xH#Sv9_3BjMHMRoZ1}Yjy0ZqhQ2w zMJI=e-#3Cd178f1W5FLRZUSc8T#J#evZPzLM((p5&H_`;w|pKxxrK5mImFUjL$x*) z>#B?nOT74$5~hTD)@>Q%XJ%ay-&lLbv@w_T-#Tl}SSQfvxanc*&m!luD*KKEwA0#`b<6RAy%g+wt)7k)~Y+>=3T0_?#nnqjpSF`)xa}P zpV}d}D`U?nb!8kG=OM_rb1_plfrigbPsS;a7+9WIu!GcbEh$tglLs)>DXsQo922|b zoNAa+iD+9;UIQCYuz0tCX=D{k+pI6uE8~auYct-A4+JMrsNh3HN&jp^rUAy?2{ai{ zjZJeVuyK{$sn=1e{FaQL)3DnTG~2onj-;E0!9#TzT#7acV46GKk~H#HOau?}2bYu?X=zBHW)~k|l%mm&AHRCMhH*qdx)%io+Nu8CR<9M$IBH zZ82czfAfFfNjHiZ&Y=g?rWU64klN4u1`jRJ>4j65z@4B@G7*i4z6{)6$mDk8YY)E2 zCtCBaJn7J-*Gh|H^Cz{*Yx2K|ID<#2-+tTBEBj3~)CIDLYD)NN3(@`R=3 z!kDc?p$5F1fSCoOeLQ)c8^vS;4~}6`NJ_;fqrmUS!P}BMi}j04Kc8Zu=fJH_#bYVa zA&Kz2NPEPbL;RBj!}Jsjwj5$42{u-_3z2z6gN|S(ONXc$%|vyIQEnXZaF{RZQ(;a8 zBQ_7D2Eh^m-_aN%=D9h9vEq~iZ=6Cd+>9TjNrFwXx)FCo(TGIsLwYg^9C}9(k0YT@ z1{Cc>)sYqqqM?sFi1lbE`7s_HuZZ|p^F_3~+!$iU*JJqOe+f$mEJMH@2Smke?pn#Z zYi{tN)lv3rf8g1%>e*4;`P52V$#XbserWZ4e4u~j;9n#k9C&v1z_V)yhI0-1(;o(U z*DhZxzBE?4Jf1yWb~it8x39X}*W5ckZf5fC*H0JBtId52W_8ZG(_3r+R4coRzLRUt zQ)Pbxu9L%bN z$u|}*mK=M4Gq$=3uX%RObgt}hLII~|Vc^>{xu;eg?ZD4l-FJ`9A6>IHm)%Xd@m2TE ztf`vzk!f4m+q`sj@#2$TcNo6WB^zZEHTEx)kZ zeYmvqNGWhMd*-3l3+?^R$$WpQIaJ_QoA;Ld`{su}M%{L(aJw*3tUtJB>xV|81aV@a zYoW1dXW zbb0Y|?q+_b}w*fcILv~k}s4!_0Zi&W@GoVwlH1X|4h;M?3(l9Lzs}p zzUAP`mrIQ&vL_ezee3KaTf+le>#D6aul)rm=G~OP{SzN$@~j8;EuUV=lmh3nb!AiC zUHiO!A+cuK@yOaBEvo19mrB-d5V}+;&z7v6bA#*t?!tlP{VVJTo;CmZRh?(vGsSn4c7Nh1HQ8z+Mjpm&xVX3{U?(t4je5g?kc{n!E1x>j}a1ZV|u(7r~+jP zmL@!>4!97NYcD965o*%|j&>5~BZqK0{tcXt;C445ot)|-wYb=-^DXbFT?$NHV7ue! zJ0Y{`RIPI9z|kA4oc;uk!KNppaO$%rR*(N=&H(s4MScA>QwmOY@saa69c###ZqiWu zWCnO=WEz2cra3Qgylxq~8nQ}F8Jehu={xfo&6dy#=a|t*(D{sZOK8Q{D5I62e*~sH zMcG?zMRja|zxr?JXYyJ8;?AtdU4SdIVqTo^RdL9%6gs<+XH=uOIzLuioh|9xCu4B) z0Zf5acX~_i1uCj=2(ebQ9-(fgVYl;S>s!uk%EIvuWiA?cCR(D}tWG zvkN@`z|$EH%~Tk?uF=(moWUHh)f5*&I7bZMl z{>TGs$Evj>KfGoQ7A}APrFULh`O1gA=)h3X)tS8;PNc6cel;I1`Mabe>0@_Gj(uxz z%^fT}zp{U|_bB)(mfilGVewFQ@S($<)4geW&6M|*8oCODA2#fP?5$EgInSF7uQlX5 zOM&h}^uxeDL}hc`JvVs$#gO~gOc(_La78hmX++aPW zKgW8i_)W_`1`)gl2mj=)39a>;g@zeg5*akj2rh|0E*+<{1QaKfNpJm zU~ONuwy#-tlpU_DdEE>Oveq>-1B{QuefO*LU(JO}4(3D0&irxIU2cD1W@P*qv z*zRhpbkr&{P_02pa88s;YQ>T`?{Ev`^r>iyk=uwq;CQi&-2G5@qetA@MqvxR9)>$z zaJfwK(HoJ=h6J-5G~^mq9c@KZTiM_K zz~8y*?<{upuN*J=j}~>Fhem&~xn~(T;DZ2J3(N(TAJ9)4@EM@pwecBL%kp#de!y#i$VN01X3(RVYq>4RTM>9wD_Sv_UFnf? zk-q{j;;xKEb^^i{f*a6pFN!7<)zPj@9In$-vDDD?b>Y1b0BCq2qoCrN4%B{@)=&7zcmH!pPkDbvKB;PnO(I z&7FH_^_LwjMN`Wo@Ae1Y9jji*Ij~Y!@}8PI4-|cOc78T@1&l=K2DNT&&D#p&CF_%- z2rZTaarXA&?IN>hIaKn4km4!ZeRr?TU&}oOwkLbn+{uSlSJ}~8G_`^>=M4}O(+KP{ zp1!%K%l?iB{_a(O_b>fD^Hvb+o-bSL?jD>!`0X#{cD~v3TF>jDlC=vQroQzw$cGKR z;2~&hF6x@d*0Mpm!EKKm1x~V`RNW;q%KZSadT44=Hn>hHl&!BG))`$!ksgSgqa5#0 zm-MjUf})f~il=Arh%utW71$|g&;n``bOts1HE&ITq*1LKboIO^Lvi(6z`~_Tr7ncs z0uH2}4RAo;hV&0Eeuln*s>94+GI9gBcGThE&1jB^O@cSIMA7F&>QyKdsa1g-A&@+# zwH-oc{mAY^XlXvu2DcEU6cUw|ylB9eeYxA(nAj-jBgOhqVQ{Rw<7A0Zg7?5SuR&mtOp+#gY4E(%RGp?!N$*JQCL{5CKcB zlk)78sp|JpPTB6d`@;MSMPFB;tz_?&=)?8vZEe|0po!SNFt~7F)zVnd%?*^ns(WVs z%mNQqPh0EUiHBC_;-#|Fw`5tgNh%Fo^k2d-b3eh?kKqMp#a4@v$C)9Uy2!N!KD*|m zin#j#xq_>17$UIhEMIWdJ!;wh<_oXAP~3TNWpAnFNO|MuCwi^TP@%MX!)JF^6I4WY z*1z1mv%)2EV4NRy+uN#kV{k;Q!B4>+tJsRSfH0~c5I3Gs@XXq) zdZ-O`)`3n8eFHW7|p!s4BrBrKZKAD=@< zZ&XuAKczt~-T>i%yBV%J6o?^0j5m{@sNq436n|Gj=BSY96atWT?M-ABCWM3T-TC8}=4;UZQaS$lD0=yWO=Ayfc!g z*WLc5Ba27!+e_|_>>!BlbuEuTpuatTJ7>siN{;rduIy_IItzEnK|1 zuYzMbtU7!^zE_;aQT0;OBZr<)H1lF9d;@HK^-GQ^JF2$oOOis{FL@>uZMK{Oc(knh zJ&$~f$nE-E4-jt^5?WHj(}VL?`&%1A#sHoH7OAQDa3-Oj}m&;W{ zd0zPT>WHwujDDjp!sqzAsNC9rKcvbd1a7x}c2sN6F#L3oV#i?BPV&Fs3rF25tK8!a zYpYXE{r|eQz`n(wQnffxyH_CXH9}5}wF@~-kW*v*!mOz7i~s(yW>srY)tY9NFO7l= zYHU$^@33CbMHzq>1C1r{TsxF;GF4{{{lr)=+L}N+N~^Ge|BSraKaV~$Mh@Crg`R=i-s-Pl`}uUo#n1kp z82MS4VHGa~iq)53aMA|xC#TZep8<~uRDOkP{B#wuXyLCg!G*agEP^6%x(Vw+3mLKg zNJ0s5hk;B0BCgo(lQH~V4>W(_H@VN9HcG$ql=f6}iKjM$O0l%Lw^zCkmOlAqxEO|7 zeK0e@?c*djL+X+4v`Ihwg+CJ{UjT<26*9k9EfY*T;i>^JgV1n^149E@5^mY1xany6 zOie~nhTXs-5;<%Gm&kOT8NP6y5q)%m;I@N5PXSHhz+?;_(;+SeAq<`Y42tLyf)Zr9 z`NYMG7cTZQmz2hCc2`R8l|Y8cFNw*&4b}s$i+^KX6csMEjM>!Gflw`oL z$lo6Xd?GKw$pr3yjci7R6oLRBO|)11igYO%#DG2 z6CV}oj&WlkmnKrr;3p^8!Yn%ixg<@n5Fq^Kg*w8m$pgz1=OMzask9hq%olx!G7uB= z?DXUm+==Ajx4v|ElTvhP*WscQEjs9C&gJp-1iXS4bf+Qh&|#e-LrVBg6eC6E6B$y$ zfVQD1%dKH@0$w$(l)6dK6>(+A{(IyoZ@~1;QR{Z^1AE7+y`${u2e*zo!`w+w`01?= zbpBPHKW8d-o+;}5Yr3;#ou!&KNYYMhNIOK*4p-C6#jYbH?WmGgHo438=g?`S;Zu}b zK{F^!|50&kgH)PuCMQ4qYb=%w^w990BU~K%**a$``bfd}Y)?FR>ONIU4KlskU zW%ft0AH?o^{-*K$#**t?)&QP2ci6iV^AkB=fnKZYUNh}lH(BqR=1mK(JJv@XU2mWJ zvvYUYh5bv177wk}G0W`SsYgcZH)sB6X5G^O_Rqj;fmbeM&7c+%UEbFm;`Puy%YkKU zvHsAS?eMzIL73xb@0^uzzuJuGRSbXMcvF_TM=_NIK@asAG>b?cI@x9QKn=D#-)^xpPd)?g0 zU*Qj2t=k(x6Xh@2gOZp0x1R*9)k1jw?EJ}!5q_lxG!&n?;BflYNN&d~6Zr$HzF^VY zUD*EKz_R|`GlfITw~CgdYq~$EUVB*A8NWI7^`XU`*=K*DV{&15iFPH^2-t07iIEXd zwA{kgihBsx0(Qc6K_T7tBD2T4G_xJr+jD9fq7*i^H>j7ea zL3{6Dg@WiZPlA=y{ed6PJjRqt4|R+_PJg29rx}pkDkN-Z7Q>F97(R8FgpXi~_b+%B zB8!3S(N%rZV?7l5I8&F{XUd@C;4cbC)U< zh9Hsdf9I?izrXJ>Nvn)%F3|M0d`E@Ca9IjhhQ;su!;eXNC841@y2^b|e4=M)SJ_fm z(PHQe*2ey$4VOto(cuQeW)LT51@VsD%<3)wbJ#ZYZ%I1gSgT)jLThv mxT dict: + """Generate a FlagScale user-perspective test case YAML.""" + return { + "meta": { + "repo": "flagscale", + "task": task_type, + "model": model, + "case": name, + "description": "TODO: describe what this test validates", + }, + "resources": { + "gpu": "A100-80GB", + "gpu_count": 8, + }, + "env": { + "CUDA_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", + "CUDA_DEVICE_MAX_CONNECTIONS": "1", + }, + "setup": [ + "pip install flagscale", + ], + "run": [ + f"flagscale {task_type} {model} --config ./conf/{name}.yaml", + ], + "verify": { + "log_path": f"tests/functional_tests/{task_type}/{model}/test_results/{name}/logs/details/host_0_localhost/*/default_*/attempt_0/*/stdout.log", + "gold_values_path": f"./gold_values/{name}.json", + "rtol": 1e-5, + "atol": 0, + }, + } + + +def create_flagscale_experiment_config(model: str, name: str, task_type: str) -> dict: + """Generate Hydra experiment config for flagscale CLI.""" + return { + "defaults": ["_self_", {task_type: name}], + "experiment": { + "exp_name": name, + "exp_dir": f"tests/functional_tests/{task_type}/{model}/test_results/{name}", + "task": { + "type": task_type, + "backend": "megatron", + "entrypoint": "flagscale/train/megatron/train_gpt.py", + }, + "runner": {"ssh_port": None}, + "envs": { + "CUDA_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7", + "CUDA_DEVICE_MAX_CONNECTIONS": "1", + }, + }, + "action": "run", + "hydra": {"run": {"dir": "${experiment.exp_dir}/hydra"}}, + } + + +def create_flagscale_train_params() -> dict: + """Generate training params sub-config.""" + return { + "defaults": ["data"], + "system": { + "tensor_model_parallel_size": 2, + "pipeline_model_parallel_size": 1, + "sequence_parallel": True, + "use_distributed_optimizer": True, + "precision": {"bf16": True}, + "logging": {"log_interval": 1}, + "checkpoint": {"no_save_optim": True, "no_save_rng": True, "save_interval": 100000}, + }, + "model": { + "num_layers": 2, + "hidden_size": 4096, + "num_attention_heads": 32, + "seq_length": 2048, + }, + } + + +def create_generic_test_case(repo: str, name: str) -> dict: + """Generate a generic user-perspective test case YAML.""" + return { + "meta": { + "repo": repo, + "case": name, + "description": "TODO: describe what this test validates", + }, + "resources": {}, + "setup": [ + f"pip install {repo.replace('-', '_')}", + ], + "run": [ + "pytest -v", + ], + } + + +def create_readme(repo: str, task_type: str, model: str, name: str) -> str: + if repo == "flagscale": + return f"""# {name} + +## Description + +TODO: Describe what this test case validates. + +## Environment + +- GPU: 8x A100 80GB +- CUDA: 12.1+ +- Python: 3.10 + +## How to Run + +```bash +pip install flagscale +flagscale {task_type} {model} --config ./conf/{name}.yaml +``` + +## Gold Values + +TODO: Describe expected values and tolerance. +""" + return f"""# {name} + +## Description + +TODO: Describe what this test case validates. + +## Environment + +- Python: 3.10 + +## How to Run + +```bash +pip install {repo} +pytest -v +``` +""" + + +def dump_yaml(data: dict, path: Path): + os.makedirs(path.parent, exist_ok=True) + with open(path, "w") as f: + yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False) + + +def main(): + parser = argparse.ArgumentParser(description="Generate test case template") + parser.add_argument("--repo", required=True, choices=VALID_REPOS) + parser.add_argument("--type", default="train") + parser.add_argument("--model", default="") + parser.add_argument("--name", required=True) + parser.add_argument("--output", default=".") + args = parser.parse_args() + + root = Path(args.output) + + if args.repo == "flagscale": + if not args.model: + print("FlagScale test cases require --model"); sys.exit(1) + + case_dir = root / "tests" / args.repo / args.type / args.model / args.name + + # Main test case YAML (user-perspective) + tc = create_flagscale_test_case(args.type, args.model, args.name) + dump_yaml(tc, case_dir / f"{args.name}.yaml") + + # Hydra experiment config + ec = create_flagscale_experiment_config(args.model, args.name, args.type) + dump_yaml(ec, case_dir / "conf" / f"{args.name}.yaml") + + # Training params sub-config + tp = create_flagscale_train_params() + dump_yaml(tp, case_dir / "conf" / "train" / f"{args.name}.yaml") + + # Gold values + gold = {"lm loss:": {"values": [0.0] * 10}} + gold_path = case_dir / "gold_values" / f"{args.name}.json" + os.makedirs(gold_path.parent, exist_ok=True) + with open(gold_path, "w") as f: + json.dump(gold, f, indent=2) + + # README + readme = create_readme(args.repo, args.type, args.model, args.name) + with open(case_dir / "README.md", "w") as f: + f.write(readme) + + print(f"Created FlagScale test case at: {case_dir}") + print(f" {args.name}.yaml — test case (setup/run/verify)") + print(f" conf/{args.name}.yaml — FlagScale experiment config") + print(f" conf/train/{args.name}.yaml — training parameters") + print(f" gold_values/{args.name}.json — expected metrics") + print(f" README.md") + else: + case_dir = root / "tests" / args.repo / args.name + tc = create_generic_test_case(args.repo, args.name) + dump_yaml(tc, case_dir / f"{args.name}.yaml") + + readme = create_readme(args.repo, "", "", args.name) + with open(case_dir / "README.md", "w") as f: + f.write(readme) + + print(f"Created test case at: {case_dir}") + + +if __name__ == "__main__": + main() diff --git a/flagos-user-tests/tools/resolve_matrix.py b/flagos-user-tests/tools/resolve_matrix.py new file mode 100644 index 0000000..c13c8f0 --- /dev/null +++ b/flagos-user-tests/tools/resolve_matrix.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +"""Resolve test case resources into a GitHub Actions matrix. + +Reads detection outputs (changed_cases / changed_repos / changed_repos_list) +and produces a JSON matrix with runner_labels, container_image, container_options, +and container_volumes per test case entry. + +Usage (from workflow): + python tools/resolve_matrix.py \ + --changed-cases '${{ steps.detect.outputs.changed_cases }}' \ + --changed-repos '${{ steps.detect.outputs.changed_repos }}' \ + --changed-repos-list '${{ steps.detect.outputs.changed_repos_list }}' +""" + +import argparse +import json +import os +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) +from run_user_tests import ( + list_test_resources, + resolve_container_image, + resolve_container_options, + resolve_runner_labels, +) + +import yaml + + +def make_entry(case_path: str, meta: dict, resources: dict, resource_map_path: Path) -> dict: + """Build a matrix entry with runner labels and per-platform container config.""" + labels = resolve_runner_labels(resources, resource_map_path) + image = resolve_container_image( + meta.get("repo", ""), meta.get("task", ""), + resources, resource_map_path, + ) + opts = resolve_container_options(resources, resource_map_path) + return { + "case_path": case_path, + "repo": meta.get("repo", ""), + "task": meta.get("task", ""), + "model": meta.get("model", ""), + "runner_labels": json.dumps(labels), + "container_image": image, + "container_options": opts["container_options"], + "container_volumes": json.dumps(opts["container_volumes"]), + } + + +def make_empty_entry(**kwargs) -> dict: + """Build a placeholder entry with defaults.""" + return { + "case_path": "", "repo": "", "task": "", "model": "", + "runner_labels": json.dumps(["self-hosted"]), + "container_image": "", "container_options": "", + "container_volumes": json.dumps([]), + **kwargs, + } + + +def resource_entry_to_matrix(entry: dict, repo: str = "", task: str = "", model: str = "") -> dict: + """Convert a list_test_resources entry to a matrix entry.""" + return { + "case_path": entry["case_path"], + "repo": repo or "", "task": task or "", "model": model or "", + "runner_labels": json.dumps(entry["runner_labels"]), + "container_image": entry.get("container_image", ""), + "container_options": entry.get("container_options", ""), + "container_volumes": json.dumps(entry.get("container_volumes", [])), + } + + +def main(): + parser = argparse.ArgumentParser(description="Resolve test resources to CI matrix") + parser.add_argument("--changed-cases", default="") + parser.add_argument("--changed-repos", default="") + parser.add_argument("--changed-repos-list", default="") + parser.add_argument("--root", default=".", help="Root directory of flagos-user-tests") + args = parser.parse_args() + + root = Path(args.root) + resource_map_path = root / "resource_map.yaml" + matrix_entries = [] + + if args.changed_cases: + cases = json.loads(args.changed_cases) + for case_path in cases: + p = root / case_path if not Path(case_path).is_absolute() else Path(case_path) + if p.exists(): + data = yaml.safe_load(p.read_text()) + matrix_entries.append(make_entry( + case_path, data.get("meta", {}), + data.get("resources", {}), resource_map_path, + )) + + elif args.changed_repos_list: + repos = json.loads(args.changed_repos_list) + for repo in repos: + for entry in list_test_resources(root, repo=repo): + matrix_entries.append(resource_entry_to_matrix(entry, repo=repo)) + + elif args.changed_repos: + info = json.loads(args.changed_repos) + if info.get("repo") == "_none_": + matrix_entries.append(make_empty_entry(repo="_none_")) + else: + repo = info["repo"] + task = info.get("task", "") or None + model = info.get("model", "") or None + entries = list_test_resources(root, repo=repo, task=task, model=model) + if entries: + for entry in entries: + matrix_entries.append(resource_entry_to_matrix( + entry, repo=repo, + task=info.get("task", ""), + model=info.get("model", ""), + )) + else: + matrix_entries.append(make_empty_entry(repo=repo)) + + matrix = {"include": matrix_entries} + matrix_json = json.dumps(matrix) + print(f"Matrix: {matrix_json}") + + # Write to GITHUB_OUTPUT if available + github_output = os.environ.get("GITHUB_OUTPUT") + if github_output: + with open(github_output, "a") as f: + f.write(f"matrix={matrix_json}\n") + else: + # For local testing, just print to stdout + print(json.dumps(matrix, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/flagos-user-tests/tools/run_user_tests.py b/flagos-user-tests/tools/run_user_tests.py new file mode 100644 index 0000000..7e15dd7 --- /dev/null +++ b/flagos-user-tests/tools/run_user_tests.py @@ -0,0 +1,547 @@ +#!/usr/bin/env python3 +"""Run user-submitted test cases against FlagOS repositories. + +Each test case is a self-contained YAML config that defines: + - setup: how to install the repo and dependencies (user's perspective) + - run: how to execute the test (user's perspective) + - verify: how to check results against gold values + +This runner simply executes user-defined commands — it does NOT call +any internal repo test scripts. This keeps test cases at the "user level". + +Usage: + # Run a specific test case + python tools/run_user_tests.py --case tests/flagscale/train/mixtral/tp2_pp1_ep2.yaml + + # Run all test cases for a repo + python tools/run_user_tests.py --repo flagscale + + # Run all test cases for a repo+task+model + python tools/run_user_tests.py --repo flagscale --task train --model mixtral +""" + +import argparse +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +import yaml + +# --------------------------------------------------------------------------- +# Gold-value comparison +# --------------------------------------------------------------------------- + +def extract_metrics_from_lines(lines: list[str], metric_keys: list[str]) -> dict: + """Extract numeric metric values from log lines. + + Supports common log formats: + - Pipe-separated: "iteration 1/10 | lm loss: 1.161E+01 | ..." + - Key-value: "step 1 metric_name:1.234" + """ + results = {k: [] for k in metric_keys} + + for line in lines: + for key in metric_keys: + # Pattern: "key " or "key: " + # Handle keys with or without trailing colon + escaped = re.escape(key.rstrip(":")) + pattern = rf"{escaped}\s*:?\s*([+-]?\d+\.?\d*(?:[eE][+-]?\d+)?)" + match = re.search(pattern, line) + if match: + try: + results[key].append(float(match.group(1))) + except ValueError: + pass + + return results + + +def extract_text_from_lines(lines: list[str], pattern: str) -> list[str]: + """Extract text values from log lines using a regex pattern. + + The pattern must contain at least one capture group. If multiple groups + are present (e.g. alternation), the first non-None group is used. + Example pattern: r"output\\.outputs\\[0\\]\\.text=(?:\"(.+?)\"|'(.+?)')" + """ + results = [] + compiled = re.compile(pattern) + + for line in lines: + match = compiled.search(line) + if match: + # Pick first non-None group + val = next((g for g in match.groups() if g is not None), None) + if val is not None: + results.append(val) + + return results + + +def compare_gold_values( + actual: dict, gold: dict, rtol: float = 1e-5, atol: float = 0 +) -> tuple[bool, list[str]]: + """Compare actual metrics against gold values. + + Supports two types of gold entries: + - numeric (default): {"values": [1.0, 2.0], "type": "numeric"} + - text: {"values": ["hello", "world"], "type": "text", + "pattern": "regex with (capture group)"} + + Returns (all_passed, list_of_messages). + """ + messages = [] + all_passed = True + + for key, gold_entry in gold.items(): + gold_values = gold_entry.get("values", []) + actual_values = actual.get(key, []) + entry_type = gold_entry.get("type", "numeric") + + if not actual_values: + messages.append(f"FAIL: No values extracted for metric '{key}'") + all_passed = False + continue + + if len(actual_values) != len(gold_values): + messages.append( + f"FAIL: Length mismatch for '{key}': " + f"got {len(actual_values)}, expected {len(gold_values)}" + ) + all_passed = False + continue + + if entry_type == "text": + for i, (a, g) in enumerate(zip(actual_values, gold_values)): + if a != g: + messages.append( + f"FAIL: '{key}'[{i}] text mismatch:\n" + f" actual: {a!r}\n" + f" gold: {g!r}" + ) + all_passed = False + break + else: + messages.append(f"PASS: '{key}' ({len(gold_values)} text values match)") + else: + # numeric comparison — numpy-free allclose + for i, (a, g) in enumerate(zip(actual_values, gold_values)): + if abs(a - g) > atol + rtol * abs(g): + messages.append( + f"FAIL: '{key}'[{i}] mismatch: actual={a}, gold={g}, " + f"diff={abs(a-g):.6e}" + ) + all_passed = False + break + else: + messages.append(f"PASS: '{key}' ({len(gold_values)} values match)") + + return all_passed, messages + + +# --------------------------------------------------------------------------- +# Test case execution +# --------------------------------------------------------------------------- + +def run_commands(cmds: list[str], cwd: str, env: dict | None = None) -> int: + """Run a list of shell commands sequentially. Return first non-zero exit code.""" + full_env = {**os.environ, **(env or {})} + for cmd in cmds: + print(f" $ {cmd}") + result = subprocess.run(cmd, shell=True, cwd=cwd, env=full_env) + if result.returncode != 0: + print(f" FAILED (exit code {result.returncode})") + return result.returncode + return 0 + + +def run_test_case(case_path: Path, workdir: Path | None = None) -> int: + """Execute a single user test case. + + Test case YAML format: + meta: + repo: flagscale + task: train + model: mixtral + description: "..." + + resources: + platform: cuda + device: A100-40GB + device_count: 1 + + setup: + - pip install flagscale + - modelscope download --model ... --local_dir ./model_weights + + run: + - flagscale train mixtral --config ./conf/tp2_pp1_ep2.yaml + + verify: + log_path: "tests/functional_tests/train/mixtral/test_results/tp2_pp1_ep2/logs/..." + gold_values_path: "./gold_values/tp2_pp1_ep2.json" + # OR inline gold values: + gold_values: + "lm loss:": + values: [11.17587, 11.16908, ...] + rtol: 1e-5 + atol: 0 + """ + print(f"\n{'='*60}") + print(f"Test Case: {case_path}") + print(f"{'='*60}") + + with open(case_path) as f: + config = yaml.safe_load(f) + + meta = config.get("meta", {}) + setup_cmds = config.get("setup", []) + run_cmds = config.get("run", []) + verify_config = config.get("verify", {}) + + print(f"Repo: {meta.get('repo', 'unknown')}") + print(f"Task: {meta.get('task', 'unknown')}") + print(f"Model: {meta.get('model', 'unknown')}") + print(f"Desc: {meta.get('description', '')}") + print() + + # Determine working directory — test case files live next to the YAML + case_dir = case_path.parent.resolve() + cwd = str(workdir.resolve()) if workdir else str(case_dir) + + env = config.get("env", {}) + # Convert all env values to strings + env = {k: str(v) for k, v in env.items()} + + # --- Setup --- + if setup_cmds: + print("--- Setup ---") + rc = run_commands(setup_cmds, cwd=cwd, env=env) + if rc != 0: + print("SETUP FAILED") + return rc + + # --- Run --- + if run_cmds: + print("\n--- Run ---") + rc = run_commands(run_cmds, cwd=cwd, env=env) + if rc != 0: + print("RUN FAILED") + return rc + + # --- Verify --- + if verify_config: + print("\n--- Verify ---") + return verify_results(verify_config, case_dir=case_dir, cwd=cwd) + + print("\nPASSED (no verify step)") + return 0 + + +def verify_results(verify_config: dict, case_dir: Path, cwd: str) -> int: + """Verify test results against gold values.""" + # Load gold values + gold = verify_config.get("gold_values") + if not gold: + gold_path = verify_config.get("gold_values_path", "") + if gold_path: + # Resolve relative to case_dir + full_path = (case_dir / gold_path) if not Path(gold_path).is_absolute() else Path(gold_path) + if not full_path.exists(): + # Also try relative to cwd + full_path = Path(cwd) / gold_path + if not full_path.exists(): + print(f"FAIL: Gold values file not found: {gold_path}") + return 1 + with open(full_path) as f: + gold = json.load(f) + else: + print("No gold values defined, skipping verification") + return 0 + + # Extract actual metrics from log + log_path = verify_config.get("log_path", "") + if not log_path: + print("FAIL: verify.log_path is required for gold value comparison") + return 1 + + # Resolve log path — try relative to cwd first, then case_dir + full_log = Path(cwd) / log_path + if not full_log.exists(): + full_log = case_dir / log_path + if not full_log.exists(): + # Try glob pattern (user might use * for timestamp dirs) + import glob as globmod + candidates = globmod.glob(str(Path(cwd) / log_path)) + if not candidates: + candidates = globmod.glob(str(case_dir / log_path)) + if candidates: + full_log = Path(sorted(candidates)[-1]) # latest match + else: + print(f"FAIL: Log file not found: {log_path}") + return 1 + + print(f"Log: {full_log}") + + # Read log via subprocess to bypass NFS client cache + import time + time.sleep(2) + log_content = subprocess.run( + ["cat", str(full_log)], capture_output=True, text=True + ).stdout + log_lines = log_content.splitlines() + + # Separate numeric and text gold entries + numeric_keys = [] + actual = {} + for key, entry in gold.items(): + entry_type = entry.get("type", "numeric") + if entry_type == "text": + pattern = entry.get("pattern", "") + if not pattern: + print(f"FAIL: Text gold entry '{key}' requires a 'pattern' field") + return 1 + actual[key] = extract_text_from_lines(log_lines, pattern) + else: + numeric_keys.append(key) + + if numeric_keys: + numeric_actual = extract_metrics_from_lines(log_lines, numeric_keys) + actual.update(numeric_actual) + + rtol = verify_config.get("rtol", 1e-5) + atol = verify_config.get("atol", 0) + passed, messages = compare_gold_values(actual, gold, rtol=rtol, atol=atol) + + for msg in messages: + print(f" {msg}") + + print(f"\nResult: {'PASSED' if passed else 'FAILED'}") + return 0 if passed else 1 + + +# --------------------------------------------------------------------------- +# Discovery and batch execution +# --------------------------------------------------------------------------- + +def discover_test_cases( + root: Path, repo: str | None = None, + task: str | None = None, model: str | None = None +) -> list[Path]: + """Find all test case YAML files under tests/. + + Test case YAMLs are identified by having a 'meta' key with 'repo'. + """ + tests_dir = root / "tests" + cases = [] + + for yaml_path in sorted(tests_dir.rglob("*.yaml")): + # Skip files in sub-config dirs (train/, data.yaml, etc.) + if yaml_path.name.startswith("_") or yaml_path.name == "data.yaml": + continue + + try: + with open(yaml_path) as f: + data = yaml.safe_load(f) + if not isinstance(data, dict) or "meta" not in data: + continue + meta = data["meta"] + if repo and meta.get("repo") != repo: + continue + if task and meta.get("task") != task: + continue + if model and meta.get("model") != model: + continue + cases.append(yaml_path) + except (yaml.YAMLError, KeyError): + continue + + return cases + + +def _load_resource_map(resource_map_path: Path) -> dict: + """Load resource_map.yaml, returning empty dict on failure.""" + if not resource_map_path.exists(): + return {} + with open(resource_map_path) as f: + return yaml.safe_load(f) or {} + + +def _get_platform_config(resource_map: dict, platform: str) -> dict: + """Get platform config from resource_map, with fallback to default_platform.""" + platforms = resource_map.get("platforms", {}) + if platform and platform in platforms: + return platforms[platform] + default_platform = resource_map.get("default_platform", "") + if default_platform and default_platform in platforms: + return platforms[default_platform] + return {} + + +def resolve_runner_labels(resources: dict, resource_map_path: Path) -> list[str]: + """Resolve test case resources to GitHub Actions runner labels. + + Uses platform-based lookup: + resources.platform -> platforms..device_labels[resources.device] + + Falls back to platform default_labels, then global default_labels. + """ + global_default = ["self-hosted"] + resource_map = _load_resource_map(resource_map_path) + if not resource_map: + return global_default + + global_default = resource_map.get("default_labels", global_default) + platform = resources.get("platform", "") + pcfg = _get_platform_config(resource_map, platform) + if not pcfg: + return global_default + + platform_default = pcfg.get("default_labels", global_default) + device = resources.get("device", "") + if not device: + return platform_default + + # Case-insensitive device lookup + device_labels = pcfg.get("device_labels", {}) + for key, labels in device_labels.items(): + if key.lower() == device.lower(): + return labels + + return platform_default + + +def resolve_container_image( + repo: str, task: str, resources: dict, resource_map_path: Path +) -> str: + """Resolve test case to a Docker container image. + + Lookup: platform -> container_images -> "/" | "" | "default" + Returns "" if no image is configured. + """ + resource_map = _load_resource_map(resource_map_path) + platform = resources.get("platform", "") + pcfg = _get_platform_config(resource_map, platform) + images = pcfg.get("container_images", {}) + if not images: + return "" + + key = f"{repo}/{task}" if task else repo + image = images.get(key, "") + if not image and repo: + image = images.get(repo, "") + if not image: + image = images.get("default", "") + return image + + +def resolve_container_options(resources: dict, resource_map_path: Path) -> dict: + """Resolve container runtime options and volumes for the given platform. + + Returns {"container_options": str, "container_volumes": list}. + """ + resource_map = _load_resource_map(resource_map_path) + platform = resources.get("platform", "") + pcfg = _get_platform_config(resource_map, platform) + return { + "container_options": pcfg.get("container_options", ""), + "container_volumes": pcfg.get("container_volumes", []), + } + + +def list_test_resources( + root: Path, repo: str | None = None, + task: str | None = None, model: str | None = None +) -> list[dict]: + """List test cases with their resource requirements, runner labels, and container config. + + Returns a list of dicts with keys: + case_path, resources, runner_labels, container_image, container_options, container_volumes + """ + cases = discover_test_cases(root, repo, task, model) + resource_map_path = root / "resource_map.yaml" + result = [] + + for case_path in cases: + with open(case_path) as f: + data = yaml.safe_load(f) + meta = data.get("meta", {}) + resources = data.get("resources", {}) + runner_labels = resolve_runner_labels(resources, resource_map_path) + container_image = resolve_container_image( + meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path + ) + container_opts = resolve_container_options(resources, resource_map_path) + result.append({ + "case_path": str(case_path), + "resources": resources, + "runner_labels": runner_labels, + "container_image": container_image, + **container_opts, + }) + + return result + + +def main(): + parser = argparse.ArgumentParser( + description="Run user-submitted FlagOS test cases" + ) + parser.add_argument("--case", help="Path to a specific test case YAML") + parser.add_argument("--repo", help="Run all cases for this repo") + parser.add_argument("--task", help="Filter by task type") + parser.add_argument("--model", help="Filter by model name") + parser.add_argument( + "--workdir", + help="Working directory for command execution (default: test case directory)" + ) + parser.add_argument( + "--list-resources", action="store_true", + help="List test cases with resource requirements and runner labels (JSON output)" + ) + args = parser.parse_args() + + # --list-resources mode: output JSON and exit + if args.list_resources: + root = Path(".") + result = list_test_resources(root, args.repo, args.task, args.model) + print(json.dumps(result, indent=2)) + sys.exit(0) + + workdir = Path(args.workdir) if args.workdir else None + + if args.case: + case_path = Path(args.case) + if not case_path.exists(): + print(f"ERROR: Test case not found: {case_path}") + sys.exit(1) + sys.exit(run_test_case(case_path, workdir)) + + if not args.repo: + print("ERROR: Specify --case, --repo, or --list-resources") + sys.exit(1) + + root = Path(".") + cases = discover_test_cases(root, args.repo, args.task, args.model) + + if not cases: + print(f"No test cases found for repo={args.repo} task={args.task} model={args.model}") + sys.exit(0) + + print(f"Found {len(cases)} test case(s)") + failed = 0 + for case in cases: + rc = run_test_case(case, workdir) + if rc != 0: + failed += 1 + + print(f"\n{'='*60}") + print(f"Results: {len(cases) - failed}/{len(cases)} passed") + print(f"{'='*60}") + sys.exit(1 if failed else 0) + + +if __name__ == "__main__": + main() diff --git a/flagos-user-tests/tools/validators/lint_test_case.py b/flagos-user-tests/tools/validators/lint_test_case.py new file mode 100644 index 0000000..e171dde --- /dev/null +++ b/flagos-user-tests/tools/validators/lint_test_case.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +"""Lint test case directories for completeness and correctness. + +Checks: +- Each test case directory has a README.md +- Each test case has at least one YAML config +- README contains required sections (Description, Environment, etc.) +- No sensitive data patterns (tokens, passwords, private paths) +""" + +import argparse +import re +import sys +from pathlib import Path + +import yaml + + +VALID_REPOS = [ + "flagscale", "flaggems", "flagcx", "flagtree", + "vllm-fl", "vllm-plugin-fl", "te-fl", "megatron-lm-fl", +] + +# Patterns that might indicate sensitive data in configs +SENSITIVE_PATTERNS = [ + re.compile(r"(password|passwd|secret|token|api_key)\s*[:=]", re.IGNORECASE), + re.compile(r"/home/[a-zA-Z0-9_]+/", re.IGNORECASE), # Private user paths + re.compile(r"sk-[a-zA-Z0-9]{20,}"), # API keys +] + +README_REQUIRED_SECTIONS = ["description", "environment"] + + +def find_test_case_dirs(root: Path) -> list[Path]: + """Find directories that contain a user-perspective test case YAML (has 'meta' key).""" + tests_dir = root / "tests" + if not tests_dir.exists(): + return [] + + test_dirs = set() + for yaml_file in tests_dir.rglob("*.yaml"): + try: + data = yaml.safe_load(yaml_file.read_text()) + if isinstance(data, dict) and "meta" in data: + test_dirs.add(yaml_file.parent) + except (yaml.YAMLError, OSError): + continue + + return sorted(test_dirs) + + +def lint_readme(readme_path: Path, strict: bool = False) -> list[str]: + """Check README.md for required content.""" + errors = [] + if not readme_path.exists(): + return [f"{readme_path.parent}: Missing README.md"] + + content = readme_path.read_text().lower() + + if strict: + for section in README_REQUIRED_SECTIONS: + if section not in content: + errors.append( + f"{readme_path}: Missing required section '{section}'" + ) + + if len(content.strip()) < 20: + errors.append(f"{readme_path}: README is too short (less than 20 characters)") + + return errors + + +def lint_sensitive_data(filepath: Path) -> list[str]: + """Check for sensitive data patterns in config files.""" + errors = [] + content = filepath.read_text() + for pattern in SENSITIVE_PATTERNS: + matches = pattern.findall(content) + if matches: + errors.append( + f"{filepath}: Possible sensitive data detected: {matches[:3]}" + ) + return errors + + +def lint_yaml_configs(test_dir: Path) -> list[str]: + """Lint YAML config files in a test directory.""" + errors = [] + yaml_files = list(test_dir.glob("*.yaml")) + if not yaml_files: + return [] + + for yf in yaml_files: + try: + with open(yf) as f: + data = yaml.safe_load(f) + if data is None: + errors.append(f"{yf}: Empty YAML file") + except yaml.YAMLError as e: + errors.append(f"{yf}: Invalid YAML - {e}") + continue + + # Check for sensitive data + errors.extend(lint_sensitive_data(yf)) + + return errors + + +def main(): + parser = argparse.ArgumentParser(description="Lint test case directories") + parser.add_argument( + "--path", default=".", + help="Root directory of flagos-user-tests" + ) + parser.add_argument( + "--strict", action="store_true", + help="Enable strict checks (README sections, etc.)" + ) + args = parser.parse_args() + + root = Path(args.path) + all_errors = [] + warnings = [] + + test_dirs = find_test_case_dirs(root) + if not test_dirs: + print("No test case directories found.") + sys.exit(0) + + for test_dir in test_dirs: + # Check README + readme_errors = lint_readme(test_dir / "README.md", strict=args.strict) + if args.strict: + all_errors.extend(readme_errors) + else: + warnings.extend(readme_errors) + + # Lint YAML configs + all_errors.extend(lint_yaml_configs(test_dir)) + + if warnings: + print(f"Warnings ({len(warnings)}):") + for w in warnings: + print(f" ⚠ {w}") + + if all_errors: + print(f"Lint FAILED with {len(all_errors)} error(s):") + for err in all_errors: + print(f" ✗ {err}") + sys.exit(1) + else: + print(f"Lint PASSED: {len(test_dirs)} test directory(ies) checked.") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/flagos-user-tests/tools/validators/validate_config.py b/flagos-user-tests/tools/validators/validate_config.py new file mode 100644 index 0000000..10cf0dc --- /dev/null +++ b/flagos-user-tests/tools/validators/validate_config.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +"""Validate YAML configuration files for test cases. + +Checks: +- YAML syntax validity +- Test case YAML (with meta key): required fields (meta.repo, setup, run) +- FlagScale sub-configs (experiment/defaults): structure validation +- Generic configs: non-empty dict +""" + +import argparse +import sys +from pathlib import Path + +import yaml + + +VALID_REPOS = [ + "flagscale", "flaggems", "flagcx", "flagtree", + "vllm-fl", "vllm-plugin-fl", "te-fl", "megatron-lm-fl", +] + + +def validate_yaml_syntax(filepath: Path) -> list[str]: + """Check that a file is valid YAML.""" + errors = [] + try: + with open(filepath) as f: + data = yaml.safe_load(f) + if data is None: + errors.append(f"{filepath}: YAML file is empty") + except yaml.YAMLError as e: + errors.append(f"{filepath}: Invalid YAML syntax - {e}") + return errors + + +def validate_test_case(filepath: Path, data: dict) -> list[str]: + """Validate a user-perspective test case YAML (has 'meta' key).""" + errors = [] + meta = data.get("meta", {}) + + if not meta.get("repo"): + errors.append(f"{filepath}: Missing 'meta.repo'") + elif meta["repo"] not in VALID_REPOS: + errors.append(f"{filepath}: Invalid meta.repo '{meta['repo']}'") + + if not data.get("run"): + errors.append(f"{filepath}: Missing 'run' (list of commands)") + elif not isinstance(data["run"], list): + errors.append(f"{filepath}: 'run' must be a list of commands") + + if "setup" in data and not isinstance(data["setup"], list): + errors.append(f"{filepath}: 'setup' must be a list of commands") + + if "verify" in data: + v = data["verify"] + if isinstance(v, dict): + has_gold = v.get("gold_values") or v.get("gold_values_path") + if has_gold and not v.get("log_path"): + errors.append(f"{filepath}: verify.log_path required when gold values are defined") + + return errors + + +def validate_flagscale_subconfig(filepath: Path, data: dict) -> list[str]: + """Validate FlagScale sub-config (experiment config or train params).""" + errors = [] + keys = set(data.keys()) + + if "experiment" in keys: + exp = data["experiment"] + if "exp_name" not in exp: + errors.append(f"{filepath}: Missing 'experiment.exp_name'") + if "task" not in exp: + errors.append(f"{filepath}: Missing 'experiment.task'") + elif "type" not in exp.get("task", {}): + errors.append(f"{filepath}: Missing 'experiment.task.type'") + elif "defaults" in keys: + # Sub-config (train params, data, etc.) — lighter validation + pass + else: + errors.append( + f"{filepath}: Missing expected top-level key " + f"('experiment' or 'defaults'), found: {keys}" + ) + return errors + + +def validate_file(filepath: Path) -> list[str]: + """Validate a single YAML file based on its content type.""" + errors = validate_yaml_syntax(filepath) + if errors: + return errors + + with open(filepath) as f: + data = yaml.safe_load(f) + if not isinstance(data, dict): + return [f"{filepath}: Must be a YAML mapping"] + + # Determine type by content + if "meta" in data: + # User-perspective test case + return validate_test_case(filepath, data) + elif "experiment" in data or "defaults" in data: + # FlagScale sub-config (Hydra config) + return validate_flagscale_subconfig(filepath, data) + else: + # Generic config — just check it's a valid non-empty dict + return [] + + +def find_yaml_files(root: Path) -> list[Path]: + """Find all YAML files under tests/.""" + tests_dir = root / "tests" + if not tests_dir.exists(): + return [] + return sorted(tests_dir.rglob("*.yaml")) + + +def main(): + parser = argparse.ArgumentParser(description="Validate test case YAML configs") + parser.add_argument("--path", default=".", help="Root directory of flagos-user-tests") + parser.add_argument("--changed-files", default="", help="Comma-separated list of changed files") + args = parser.parse_args() + + root = Path(args.path) + + if args.changed_files: + yaml_files = [ + Path(f) for f in args.changed_files.split(",") + if f.strip().endswith(".yaml") and f.strip().startswith("tests/") + ] + else: + yaml_files = find_yaml_files(root) + + if not yaml_files: + print("No YAML test config files found to validate.") + sys.exit(0) + + all_errors = [] + for filepath in yaml_files: + full_path = root / filepath if not filepath.is_absolute() else filepath + if not full_path.exists(): + all_errors.append(f"{filepath}: File does not exist") + continue + all_errors.extend(validate_file(full_path)) + + if all_errors: + print(f"Validation FAILED with {len(all_errors)} error(s):") + for err in all_errors: + print(f" ✗ {err}") + sys.exit(1) + else: + print(f"Validation PASSED: {len(yaml_files)} file(s) checked.") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/flagos-user-tests/tools/validators/validate_gold_values.py b/flagos-user-tests/tools/validators/validate_gold_values.py new file mode 100644 index 0000000..86ab52a --- /dev/null +++ b/flagos-user-tests/tools/validators/validate_gold_values.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +"""Validate gold values JSON files for test cases. + +Checks: +- Valid JSON syntax +- Expected structure: keys map to objects with "values" arrays +- All values are numeric +- At least one value is present +""" + +import argparse +import json +import sys +from pathlib import Path + + +def validate_gold_values_file(filepath: Path) -> list[str]: + """Validate a single gold values JSON file.""" + errors = [] + + try: + with open(filepath) as f: + data = json.load(f) + except json.JSONDecodeError as e: + return [f"{filepath}: Invalid JSON - {e}"] + + if not isinstance(data, dict): + return [f"{filepath}: Gold values must be a JSON object, got {type(data).__name__}"] + + if not data: + return [f"{filepath}: Gold values file is empty"] + + for key, value in data.items(): + if not isinstance(value, dict): + errors.append(f"{filepath}: Key '{key}' must map to an object, got {type(value).__name__}") + continue + + if "values" not in value: + errors.append(f"{filepath}: Key '{key}' missing 'values' field") + continue + + values = value["values"] + if not isinstance(values, list): + errors.append(f"{filepath}: Key '{key}'.values must be an array") + continue + + if len(values) == 0: + errors.append(f"{filepath}: Key '{key}'.values is empty") + continue + + for i, v in enumerate(values): + if not isinstance(v, (int, float)): + errors.append( + f"{filepath}: Key '{key}'.values[{i}] is not numeric: {v!r}" + ) + + return errors + + +def find_gold_values_files(root: Path) -> list[Path]: + """Find all gold values JSON files under tests/. + + Supports both conventions: + - FlagScale: tests////gold_values/.json + - Flat: tests///_gold_values.json + """ + tests_dir = root / "tests" + if not tests_dir.exists(): + return [] + # Match files inside gold_values/ directories + gold_dir_files = list(tests_dir.rglob("gold_values/*.json")) + # Match files with _gold_values in name (legacy flat layout) + gold_name_files = list(tests_dir.rglob("*_gold_values.json")) + return list(set(gold_dir_files + gold_name_files)) + + +def main(): + parser = argparse.ArgumentParser(description="Validate gold values JSON files") + parser.add_argument( + "--path", default=".", + help="Root directory of flagos-user-tests" + ) + args = parser.parse_args() + + root = Path(args.path) + all_errors = [] + + gold_files = find_gold_values_files(root) + + if not gold_files: + print("No gold values files found. Skipping validation.") + sys.exit(0) + + for filepath in gold_files: + all_errors.extend(validate_gold_values_file(filepath)) + + if all_errors: + print(f"Gold values validation FAILED with {len(all_errors)} error(s):") + for err in all_errors: + print(f" ✗ {err}") + sys.exit(1) + else: + print(f"Gold values validation PASSED: {len(gold_files)} file(s) checked.") + sys.exit(0) + + +if __name__ == "__main__": + main() From a55028754b2855a380a2ee46de19269747b1442a Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 14:17:23 +0800 Subject: [PATCH 02/13] [wip] add container init env cmd --- .github/workflows/test_dispatch.yml | 5 +++ flagos-user-tests/resource_map.yaml | 5 +++ flagos-user-tests/tools/resolve_matrix.py | 10 +++++- flagos-user-tests/tools/run_user_tests.py | 31 ++++++++++++++++++- .../tools/validators/validate_gold_values.py | 27 ++++++++++++---- 5 files changed, 70 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test_dispatch.yml b/.github/workflows/test_dispatch.yml index 185a9b8..a7a5be1 100644 --- a/.github/workflows/test_dispatch.yml +++ b/.github/workflows/test_dispatch.yml @@ -88,6 +88,11 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Container init + if: ${{ matrix.container_init != '' }} + shell: bash -l {0} + run: ${{ matrix.container_init }} + - name: Install runner dependencies run: pip install pyyaml diff --git a/flagos-user-tests/resource_map.yaml b/flagos-user-tests/resource_map.yaml index 6cc25a1..2ccaf07 100644 --- a/flagos-user-tests/resource_map.yaml +++ b/flagos-user-tests/resource_map.yaml @@ -42,6 +42,11 @@ platforms: flagscale/inference: "localhost:5000/flagscale-inference:dev-cu128-py3.12-20260302102033" flagscale/hetero_train: "localhost:5000/flagscale-train:dev-cu128-py3.12-20260228210721" + # Container init commands: run inside the container before test execution + # Same key format as container_images: "/" | "" | "default" + container_init: + flagscale/inference: "conda activate flagscale-inference" + # Container runtime options container_options: "--gpus all --shm-size=500g --user root --ulimit nofile=65535:65535" diff --git a/flagos-user-tests/tools/resolve_matrix.py b/flagos-user-tests/tools/resolve_matrix.py index c13c8f0..0ade247 100644 --- a/flagos-user-tests/tools/resolve_matrix.py +++ b/flagos-user-tests/tools/resolve_matrix.py @@ -22,6 +22,7 @@ from run_user_tests import ( list_test_resources, resolve_container_image, + resolve_container_init, resolve_container_options, resolve_runner_labels, ) @@ -36,6 +37,10 @@ def make_entry(case_path: str, meta: dict, resources: dict, resource_map_path: P meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path, ) + init_cmd = resolve_container_init( + meta.get("repo", ""), meta.get("task", ""), + resources, resource_map_path, + ) opts = resolve_container_options(resources, resource_map_path) return { "case_path": case_path, @@ -44,6 +49,7 @@ def make_entry(case_path: str, meta: dict, resources: dict, resource_map_path: P "model": meta.get("model", ""), "runner_labels": json.dumps(labels), "container_image": image, + "container_init": init_cmd, "container_options": opts["container_options"], "container_volumes": json.dumps(opts["container_volumes"]), } @@ -54,7 +60,8 @@ def make_empty_entry(**kwargs) -> dict: return { "case_path": "", "repo": "", "task": "", "model": "", "runner_labels": json.dumps(["self-hosted"]), - "container_image": "", "container_options": "", + "container_image": "", "container_init": "", + "container_options": "", "container_volumes": json.dumps([]), **kwargs, } @@ -67,6 +74,7 @@ def resource_entry_to_matrix(entry: dict, repo: str = "", task: str = "", model: "repo": repo or "", "task": task or "", "model": model or "", "runner_labels": json.dumps(entry["runner_labels"]), "container_image": entry.get("container_image", ""), + "container_init": entry.get("container_init", ""), "container_options": entry.get("container_options", ""), "container_volumes": json.dumps(entry.get("container_volumes", [])), } diff --git a/flagos-user-tests/tools/run_user_tests.py b/flagos-user-tests/tools/run_user_tests.py index 7e15dd7..36a53a1 100644 --- a/flagos-user-tests/tools/run_user_tests.py +++ b/flagos-user-tests/tools/run_user_tests.py @@ -451,6 +451,30 @@ def resolve_container_options(resources: dict, resource_map_path: Path) -> dict: } +def resolve_container_init( + repo: str, task: str, resources: dict, resource_map_path: Path +) -> str: + """Resolve container init command for the given platform and repo/task. + + Lookup: platform -> container_init -> "/" | "" | "default" + Returns "" if no init command is configured. + """ + resource_map = _load_resource_map(resource_map_path) + platform = resources.get("platform", "") + pcfg = _get_platform_config(resource_map, platform) + init_cmds = pcfg.get("container_init", {}) + if not init_cmds: + return "" + + key = f"{repo}/{task}" if task else repo + cmd = init_cmds.get(key, "") + if not cmd and repo: + cmd = init_cmds.get(repo, "") + if not cmd: + cmd = init_cmds.get("default", "") + return cmd + + def list_test_resources( root: Path, repo: str | None = None, task: str | None = None, model: str | None = None @@ -458,7 +482,8 @@ def list_test_resources( """List test cases with their resource requirements, runner labels, and container config. Returns a list of dicts with keys: - case_path, resources, runner_labels, container_image, container_options, container_volumes + case_path, resources, runner_labels, container_image, container_init, + container_options, container_volumes """ cases = discover_test_cases(root, repo, task, model) resource_map_path = root / "resource_map.yaml" @@ -473,12 +498,16 @@ def list_test_resources( container_image = resolve_container_image( meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path ) + container_init = resolve_container_init( + meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path + ) container_opts = resolve_container_options(resources, resource_map_path) result.append({ "case_path": str(case_path), "resources": resources, "runner_labels": runner_labels, "container_image": container_image, + "container_init": container_init, **container_opts, }) diff --git a/flagos-user-tests/tools/validators/validate_gold_values.py b/flagos-user-tests/tools/validators/validate_gold_values.py index 86ab52a..6d690cc 100644 --- a/flagos-user-tests/tools/validators/validate_gold_values.py +++ b/flagos-user-tests/tools/validators/validate_gold_values.py @@ -4,8 +4,9 @@ Checks: - Valid JSON syntax - Expected structure: keys map to objects with "values" arrays -- All values are numeric - At least one value is present +- Numeric entries (default): all values are int/float +- Text entries (type: "text"): all values are strings, "pattern" field is present """ import argparse @@ -48,11 +49,25 @@ def validate_gold_values_file(filepath: Path) -> list[str]: errors.append(f"{filepath}: Key '{key}'.values is empty") continue - for i, v in enumerate(values): - if not isinstance(v, (int, float)): - errors.append( - f"{filepath}: Key '{key}'.values[{i}] is not numeric: {v!r}" - ) + entry_type = value.get("type", "numeric") + + if entry_type == "text": + # Text entries require a 'pattern' field for extraction + if "pattern" not in value: + errors.append(f"{filepath}: Key '{key}' has type 'text' but missing 'pattern' field") + for i, v in enumerate(values): + if not isinstance(v, str): + errors.append( + f"{filepath}: Key '{key}'.values[{i}] is not a string: {v!r}" + ) + elif entry_type == "numeric": + for i, v in enumerate(values): + if not isinstance(v, (int, float)): + errors.append( + f"{filepath}: Key '{key}'.values[{i}] is not numeric: {v!r}" + ) + else: + errors.append(f"{filepath}: Key '{key}' has unknown type: {entry_type!r}") return errors From d0fce11ba94427cf8fd68ef6e13db57588120e61 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 14:38:32 +0800 Subject: [PATCH 03/13] fix conda env --- .github/workflows/test_dispatch.yml | 14 +++--- flagos-user-tests/resource_map.yaml | 6 +-- flagos-user-tests/tools/activate_conda.sh | 58 +++++++++++++++++++++++ flagos-user-tests/tools/resolve_matrix.py | 10 ++-- flagos-user-tests/tools/run_user_tests.py | 28 +++++------ 5 files changed, 88 insertions(+), 28 deletions(-) create mode 100755 flagos-user-tests/tools/activate_conda.sh diff --git a/.github/workflows/test_dispatch.yml b/.github/workflows/test_dispatch.yml index a7a5be1..25245c8 100644 --- a/.github/workflows/test_dispatch.yml +++ b/.github/workflows/test_dispatch.yml @@ -88,16 +88,18 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Container init - if: ${{ matrix.container_init != '' }} - shell: bash -l {0} - run: ${{ matrix.container_init }} - - name: Install runner dependencies - run: pip install pyyaml + run: | + if [ -n "${{ matrix.conda_env }}" ]; then + source flagos-user-tests/tools/activate_conda.sh ${{ matrix.conda_env }} + fi + pip install pyyaml - name: Run user tests run: | + if [ -n "${{ matrix.conda_env }}" ]; then + source flagos-user-tests/tools/activate_conda.sh ${{ matrix.conda_env }} + fi ARGS="" if [ -n "${{ matrix.case_path }}" ]; then ARGS="--case ${{ matrix.case_path }}" diff --git a/flagos-user-tests/resource_map.yaml b/flagos-user-tests/resource_map.yaml index 2ccaf07..30f5f75 100644 --- a/flagos-user-tests/resource_map.yaml +++ b/flagos-user-tests/resource_map.yaml @@ -42,10 +42,10 @@ platforms: flagscale/inference: "localhost:5000/flagscale-inference:dev-cu128-py3.12-20260302102033" flagscale/hetero_train: "localhost:5000/flagscale-train:dev-cu128-py3.12-20260228210721" - # Container init commands: run inside the container before test execution + # Conda environment to activate inside the container before test execution. # Same key format as container_images: "/" | "" | "default" - container_init: - flagscale/inference: "conda activate flagscale-inference" + conda_env: + flagscale/inference: "flagscale-inference" # Container runtime options container_options: "--gpus all --shm-size=500g --user root --ulimit nofile=65535:65535" diff --git a/flagos-user-tests/tools/activate_conda.sh b/flagos-user-tests/tools/activate_conda.sh new file mode 100755 index 0000000..a8e11bd --- /dev/null +++ b/flagos-user-tests/tools/activate_conda.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# Activate a conda environment inside a container. +# +# Detects conda installation, initializes the shell, then activates the env. +# Must be sourced (not executed) so the activation persists in the caller's shell: +# source tools/activate_conda.sh [conda_path] +# +# Arguments: +# env_name — conda environment name (required) +# conda_path — path to conda installation (optional, auto-detected if omitted) + +set -e + +_activate_conda() { + local env_name="${1:?Usage: source activate_conda.sh [conda_path]}" + local conda_path="${2:-}" + + # Auto-detect conda path if not provided + if [ -z "$conda_path" ]; then + if [ -n "$CONDA_DIR" ] && [ -d "$CONDA_DIR" ]; then + conda_path="$CONDA_DIR" + elif command -v conda &>/dev/null; then + conda_path="$(conda info --base 2>/dev/null)" + elif [ -d "$HOME/miniconda3" ]; then + conda_path="$HOME/miniconda3" + elif [ -d "$HOME/anaconda3" ]; then + conda_path="$HOME/anaconda3" + elif [ -d "/opt/conda" ]; then + conda_path="/opt/conda" + fi + fi + + if [ -z "$conda_path" ]; then + echo "[activate_conda] WARNING: conda not found, skipping activation" + return 0 + fi + + local conda_sh="$conda_path/etc/profile.d/conda.sh" + if [ ! -f "$conda_sh" ]; then + echo "[activate_conda] ERROR: conda.sh not found at $conda_sh" + return 1 + fi + + # Initialize conda for this shell + echo "[activate_conda] Initializing conda from $conda_path" + source "$conda_sh" + + # Activate the environment + echo "[activate_conda] Activating environment: $env_name" + conda activate "$env_name" || { + echo "[activate_conda] ERROR: Failed to activate conda env '$env_name'" + return 1 + } + + echo "[activate_conda] Active Python: $(which python) ($(python --version 2>&1))" +} + +_activate_conda "$@" diff --git a/flagos-user-tests/tools/resolve_matrix.py b/flagos-user-tests/tools/resolve_matrix.py index 0ade247..1e8c9db 100644 --- a/flagos-user-tests/tools/resolve_matrix.py +++ b/flagos-user-tests/tools/resolve_matrix.py @@ -21,8 +21,8 @@ sys.path.insert(0, str(Path(__file__).parent)) from run_user_tests import ( list_test_resources, + resolve_conda_env, resolve_container_image, - resolve_container_init, resolve_container_options, resolve_runner_labels, ) @@ -37,7 +37,7 @@ def make_entry(case_path: str, meta: dict, resources: dict, resource_map_path: P meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path, ) - init_cmd = resolve_container_init( + init_cmd = resolve_conda_env( meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path, ) @@ -49,7 +49,7 @@ def make_entry(case_path: str, meta: dict, resources: dict, resource_map_path: P "model": meta.get("model", ""), "runner_labels": json.dumps(labels), "container_image": image, - "container_init": init_cmd, + "conda_env": init_cmd, "container_options": opts["container_options"], "container_volumes": json.dumps(opts["container_volumes"]), } @@ -60,7 +60,7 @@ def make_empty_entry(**kwargs) -> dict: return { "case_path": "", "repo": "", "task": "", "model": "", "runner_labels": json.dumps(["self-hosted"]), - "container_image": "", "container_init": "", + "container_image": "", "conda_env": "", "container_options": "", "container_volumes": json.dumps([]), **kwargs, @@ -74,7 +74,7 @@ def resource_entry_to_matrix(entry: dict, repo: str = "", task: str = "", model: "repo": repo or "", "task": task or "", "model": model or "", "runner_labels": json.dumps(entry["runner_labels"]), "container_image": entry.get("container_image", ""), - "container_init": entry.get("container_init", ""), + "conda_env": entry.get("conda_env", ""), "container_options": entry.get("container_options", ""), "container_volumes": json.dumps(entry.get("container_volumes", [])), } diff --git a/flagos-user-tests/tools/run_user_tests.py b/flagos-user-tests/tools/run_user_tests.py index 36a53a1..ac7f534 100644 --- a/flagos-user-tests/tools/run_user_tests.py +++ b/flagos-user-tests/tools/run_user_tests.py @@ -451,28 +451,28 @@ def resolve_container_options(resources: dict, resource_map_path: Path) -> dict: } -def resolve_container_init( +def resolve_conda_env( repo: str, task: str, resources: dict, resource_map_path: Path ) -> str: - """Resolve container init command for the given platform and repo/task. + """Resolve conda environment name for the given platform and repo/task. - Lookup: platform -> container_init -> "/" | "" | "default" - Returns "" if no init command is configured. + Lookup: platform -> conda_env -> "/" | "" | "default" + Returns "" if no conda env is configured. """ resource_map = _load_resource_map(resource_map_path) platform = resources.get("platform", "") pcfg = _get_platform_config(resource_map, platform) - init_cmds = pcfg.get("container_init", {}) - if not init_cmds: + conda_envs = pcfg.get("conda_env", {}) + if not conda_envs: return "" key = f"{repo}/{task}" if task else repo - cmd = init_cmds.get(key, "") - if not cmd and repo: - cmd = init_cmds.get(repo, "") - if not cmd: - cmd = init_cmds.get("default", "") - return cmd + env = conda_envs.get(key, "") + if not env and repo: + env = conda_envs.get(repo, "") + if not env: + env = conda_envs.get("default", "") + return env def list_test_resources( @@ -498,7 +498,7 @@ def list_test_resources( container_image = resolve_container_image( meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path ) - container_init = resolve_container_init( + conda_env = resolve_conda_env( meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path ) container_opts = resolve_container_options(resources, resource_map_path) @@ -507,7 +507,7 @@ def list_test_resources( "resources": resources, "runner_labels": runner_labels, "container_image": container_image, - "container_init": container_init, + "conda_env": conda_env, **container_opts, }) From 4be9437ccb4d638a6007d14301c87a083391fd62 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 14:40:03 +0800 Subject: [PATCH 04/13] fix conda env --- .github/workflows/test_dispatch.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_dispatch.yml b/.github/workflows/test_dispatch.yml index 25245c8..4b39f0d 100644 --- a/.github/workflows/test_dispatch.yml +++ b/.github/workflows/test_dispatch.yml @@ -89,6 +89,7 @@ jobs: uses: actions/checkout@v4 - name: Install runner dependencies + shell: bash run: | if [ -n "${{ matrix.conda_env }}" ]; then source flagos-user-tests/tools/activate_conda.sh ${{ matrix.conda_env }} @@ -96,6 +97,7 @@ jobs: pip install pyyaml - name: Run user tests + shell: bash run: | if [ -n "${{ matrix.conda_env }}" ]; then source flagos-user-tests/tools/activate_conda.sh ${{ matrix.conda_env }} From e924f290cbda70d572504a95f1e3bef753441ff6 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 14:41:13 +0800 Subject: [PATCH 05/13] fix conda env --- .github/workflows/test_dispatch.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_dispatch.yml b/.github/workflows/test_dispatch.yml index 4b39f0d..50f8f86 100644 --- a/.github/workflows/test_dispatch.yml +++ b/.github/workflows/test_dispatch.yml @@ -92,7 +92,7 @@ jobs: shell: bash run: | if [ -n "${{ matrix.conda_env }}" ]; then - source flagos-user-tests/tools/activate_conda.sh ${{ matrix.conda_env }} + source tools/activate_conda.sh ${{ matrix.conda_env }} fi pip install pyyaml @@ -100,7 +100,7 @@ jobs: shell: bash run: | if [ -n "${{ matrix.conda_env }}" ]; then - source flagos-user-tests/tools/activate_conda.sh ${{ matrix.conda_env }} + source tools/activate_conda.sh ${{ matrix.conda_env }} fi ARGS="" if [ -n "${{ matrix.case_path }}" ]; then From 615b5668d572a190b61fb334e57140e97e631994 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 15:02:29 +0800 Subject: [PATCH 06/13] fix test cmd --- .../qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml | 2 +- .../flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml index d941f2b..f1ce909 100644 --- a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml @@ -1,5 +1,5 @@ llm: - model: /share/project/models/Qwen/Qwen3-0.6B + model: /home/gitlab-runner/data/Qwen3-0.6B trust_remote_code: true tensor_parallel_size: 1 pipeline_parallel_size: 1 diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml index 7f997c7..e266a48 100644 --- a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml @@ -23,15 +23,7 @@ setup: - git clone https://github.com/FlagOpen/FlagScale.git && cd FlagScale && pip install . run: - - flagscale inference qwen3 --config ./conf/demo_0_6b.yaml - - | - pid_file="./outputs/qwen3/inference_logs/pids/host_0_localhost.pid" - if [ -f "$pid_file" ]; then - pid=$(cat "$pid_file") - echo "Waiting for inference process $pid to complete..." - while kill -0 "$pid" 2>/dev/null; do sleep 2; done - echo "Inference process completed." - fi + - flagscale inference qwen3 --config ./conf/demo_0_6b.yaml --test verify: log_path: "./outputs/qwen3/inference_logs/host_0_localhost.output" From 39dc2081a71eb85129c1ed0430accb91daeaa633 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 15:06:15 +0800 Subject: [PATCH 07/13] fix test cmd --- .../inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml index f1ce909..c91ac04 100644 --- a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml @@ -1,5 +1,5 @@ llm: - model: /home/gitlab-runner/data/Qwen3-0.6B + model: Qwen/Qwen3-0.6B trust_remote_code: true tensor_parallel_size: 1 pipeline_parallel_size: 1 From e059d32a167c81bc04d9929bd656a825218fe64f Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 15:13:22 +0800 Subject: [PATCH 08/13] short name --- .github/workflows/test_dispatch.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_dispatch.yml b/.github/workflows/test_dispatch.yml index 50f8f86..c5253bd 100644 --- a/.github/workflows/test_dispatch.yml +++ b/.github/workflows/test_dispatch.yml @@ -74,6 +74,7 @@ jobs: --changed-repos-list '${{ steps.detect.outputs.changed_repos_list }}' run-tests: + name: ${{ matrix.repo }}/${{ matrix.task }}/${{ matrix.model }} needs: detect-changes if: ${{ needs.detect-changes.outputs.matrix != '' && !contains(needs.detect-changes.outputs.matrix, '_none_') }} strategy: From f4ba4a372fa2305143751dd594bc5c764909ed94 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 15:23:49 +0800 Subject: [PATCH 09/13] fix resolve matrix --- .gitignore | 1 + .../__pycache__/run_user_tests.cpython-312.pyc | Bin 22466 -> 0 bytes flagos-user-tests/tools/resolve_matrix.py | 4 +++- flagos-user-tests/tools/run_user_tests.py | 3 +++ 4 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 .gitignore delete mode 100644 flagos-user-tests/tools/__pycache__/run_user_tests.cpython-312.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..71bc36f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +**pycache** \ No newline at end of file diff --git a/flagos-user-tests/tools/__pycache__/run_user_tests.cpython-312.pyc b/flagos-user-tests/tools/__pycache__/run_user_tests.cpython-312.pyc deleted file mode 100644 index 05fdd462b2ee932f1644c0ed136243d22786ae4d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 22466 zcmb_^Yj7LKncxiGPXZub1Rvsvq(p)uAyJZLi_v2xQWEvBWQ($6$&$Gc1|&g&06har zA_6ANR<;5rsuNjuE#Y&#hBnR_K6@`rZSKNcU2Wv#cGp+AAE3cR(1cf|tX;L6xTlak{7b>#v_*_xEc2+-|o}@YtLGbL^QRiuwpYD=|aVHfmsv-_TvLtMW}zD{GKpAm0r6b!Lj~W-U=0YlS~s)WO=}&k%J+T?(WI zv5#8Vx^L*O)U&%-hn&MY0iz4bY0^fh-w<_=n%H`P@&J@OYGOU`SIsl9UdZ#ZJ}BXb z94sxCX@Hja%oMbxhYdhZBjkwXnv__x62m^2q&BqxMnAg^%3z;$Y%5@DU>T@kJHd|Q z&<1G%wjI($IBC2frvq{tNo`n96~YR=-2r(`sx-v36VjUhMeVdn?d*hhwy<5_pss9- zwz9z+T@=NcG*ookyVA0tsTs|TCfGaOsHLcj(+Ospk8-{I^!3SDDivj!RFqFKkuV?S znebRRmVmF5@$lG%VTOxNCHYt?$;G04$YMDWj*M5KnHaz`d^A4V8%ZWoP!MokKKArE z20lk)V@zs1oMPDMC}i;c7KZ5s;MDX~KQo@Z#iWu9))bD%0WL~vV!{a);HRPqHkyb) zl}s15w3BD1q8vXJjih2Xqd|g*n@%WbqPL@w=~R@UAzj(B)XgXt8=X-KM#iI&8&ESp z9ZyxeK9-EL%*}9oI?7utm&T!!P+20%F??)tDn7$V)$!seirr(O%afDPe4hE+xmTGO zbUO*nKY!s8jB7k@2`6Ttx2Y(X2*<^)l7{jTE;g0oLky|rMl?FbSBDo`lp=je?Ai za?loBGGDBAgk+R@zYyYGsW5+I*JP57#{V^SK$j5#L%Ikb1Q8>3)$$u8m-9hd&^;4Q zjSKoXu3N!68BKAq$jFW83@;csSioE&0^6+em>~vqMLiy(ar4d4gVbG3L=&STnrnKB zn$=`9W7I2}J4PKftA#IWkh*po(sc-V?PZARQ&3WTCe)avw2XcN@N9lq`dz7%9L^dt zhSw=p^M+>DsKRL2T#}+P#*FS=sis*|#>CTiPME2THlxQaWdeJ&=}B4Tgp6szF2}1O ztCd@^Ii#sOypFn))&c%u7!w-)W;M5{b}A(wTT}2?cKLuEF#eT>EKnqyZ

x?bGXd+q2| zcGuMqyt})O_P-cCaZOGP9u58mCpKsjG+b0LM1d-&Amn&hhEsxpkA}I(xL^oR;XWzo zN8`zGO3;sS$>}M<@;vTaCpa$21PVLb~*5jiUur^0X& za0x*VLlqh4aRrcm;p=y&ck|<6F1mY)OHRNhygMG7nNE*SCwG%mVoKaRl6)^YR>=0U zdy73sXpD-JIwDRcKZ4UZLZ*;wgi3e7AO9Y_=BQ8VsMhXMUH5A@a<1&l3vKh$xf^q* zign!|IlWokuiV?R1D_fwe_$!L7|Uzlw%oH6_I=k;^6klNOwVZu}`j!<)mr4f_SsBiS$3AhJY_!49FK)>`zhQ2MNv>>g*y?xC%XsRWK}wFX8fAZgU1g5G^QDYtLfHdkjIfg!)O6}^n zd>F{Ott+(aXs|8QNy5&cxEDmE2_l1t)4dP=9`A(LS&GI3gt`V(G)re_7h1cIx|2Rh#{R8)oS> zA{b$Vor=Yy9G(S&4k}Fv1~O(mhueD4DDFPE`Z+hkn4}Uc=fSiFOw+;^%H#TkT?QYq z7yd1p&UFIh5%}Y;!)uPJ)KL!C+^Kbk>u!2Jo$D+)+UADH=1_JvEIAe(d0ol5BdZsz z&X4PTtNwxf6K@~7cj&DnCI7(vOQrfV*@3d7e*SA^r+=w#u`b8|!r5N2Bf&cf!9Rvt zDA;Htsp)WB&>>$a=(to8;$e74aC?;>7eokAr{GP98YCY;DRnE8f`uGSX9>IoZRctbhT zK_WEH>hFBHx|L+iGTE!rOmfzSpTYRND+`%NrRB zYbBCSETwE{DkjC8G09^lw+fi8kTlj-GUB#bd&ZWrV{2ifbZpUH8)>gSV};h$Wo)l& zSVzYGy5ip%h5j9Ef3-){NpQGo_l>Q8Loc_Hh81zg`Y-6wmCZd$Da;x6Ee*sf z;|BIlqsq0*>1p5)(y-=k43cr^-l7lcad5*pP^<^WfnvQXoSSN7eXr}@u*mhu;{-j- z{3}PwziFiMfJdLvjpCux5WWJWHUQfa?)DTErc$ z7X+r;DT!$mlb8)i+Qy^tc(M&p+)8qBwoQ!;i?;PhnB>A6!#3&2#JagCo~pNEsd1)j z!#Np*<`66wi8KRZHpr+WQ(+#Cx*p)C`P4{q6gV^9Tx-ut{Y#w@YRFxCV9vOOLamxJedUxc4Gur z1heauo~F_C|vz?FP^v(|FMwl^)WS@Qb| zno`5Ati9}QU3zKprTodl>5}(A*79pdbJ^y(dtv@Uaogbi;rk~(a21=+uG!9&1KYA^ zAnRjmU3Ru)ZF$R`f3dW^??Y>!1YfT2Ekp{>RVaF&f8F1hJ6!Syi@xr{t{+BLc9jmC zEIx4xdGXWqr&_JOu41AZTM5EPR&UPtp|y3rW#?Ob;V{5#JC(w$6n! z4?Tg3i`q6sS3Hy}K)8<1LVLkmi2Sg9`9^8)@nYXVap3tP^W|SUUjTV!TW9Tdth;>+ zFX#Fer(fyM>+-4hBFo#~9lbZZ%Iq(>_t$D&ao3TRmscY9{U5kKz^0y~nRTW;e=Hxq zH}G9k!JYdm3`k%%j0d#UMA|AqTOFjW^;@>}{*fYc@t4lw$}mkecU7LJDVuNMcG22e z)U|S;Sp;S;GReX!kKAGfZ8UK+qGnl;mm_S@KSN}Ricu*D#iy2L<9l>QbDQQtClI5) zM@!Pp2ue38MdQKJ867YHns+5eil(Z^=mheno1To0)nw?5_C4*p5`%`4&;(JQZvs)F7Bfr{Q=2 z6fo(NEb73B!W89LaZeHV5*un5VLu^iQ0`gCeHH%rS$NG+j~qR(wk>rpb{8EzdH0_M z3L1#a4gJdD&T7|9_66O-@WP2~e9hFf?rK`vwzw^?E$m+&UKw9=oyr=@R@Vb-^QyHu zPnWFQ*KMw%r>o$55bR$K_7}Upw9;3sKfGo;Qg#O(xI0$e9Us~{Dptzrsn{r+U0k(h z)OErc2Z}u-oUAw47`H(x$CX4@?p0VdOH@@NM+3>G^{24(EXdkr813{-GYO{&fTRUr;wcetFE#ZUE{S%VWDhy_& z#29Fmh>=IJrpQV#*}5Xy)kIZxmE8s2I$Zq%{Rvo9`c)>CD$9pvkABhaVX3AP`$xZG z0I6oMpcqEe6sX5QkRY0Gr5J4g$!QMG4t32_@o)-T*UvoIk ztC)D82M{NkuMyb!nb2+mJ#s4=8yio7oliytXPCNMz16{#j7E~N2(U>;&d_d*Y+)xu zw35+Lse(bP1UZRdXhm)L=yU>Xn@KPZiq^T!HaJ)XBhsm>u4p$P;&-d;5?Gyzlb0(A z?N%kK5riiAWJ1(RVo@|GUbqOujQV&{1{TE%^^mAgsguD3B3n(`Hm1V}5MwGz$Y4)- z@`|ro=;NYI8deE$LO*J;CUxT#}?R`R(i0Za}W)DdbXv^;o9sYlK($-5zRRh2e zfYd7Jkcfb-Ld!IVs!)(~iJ3TUyhz9iL{OwF!8o0`k${DjHeMpcgrX@#(#EIJ9NEu+ z?F1uf;~)?ZNSuOIMMZ+ht)=a~y}isZ)&(o5H>jzySt2868$NOAxo0G1E^V_QIP#GY zrY#qrJ1^%rNY3-5p&}+wup);8tDE3VH&{^KMUF55lDPVQ%bf)0(k zf|(DGMn{ljxcy`ZP$&dO5sgO4_)QR9!TQAQgDk?>33Vbuc~OEqdlBE!64EG z_htOHkf9wRQiF-`h$uhUB@7beA7b$Vd=aL$9t+Yp!2%Z9Qg*c?E~K9k>^PT%B6;}Z zKZOl_j{5WuXi9I%9?#y)j%Rz`rU$ z*t*M0I8VCdYRejatEDV~ih;7#-|e68Ul`3@2i9|LuQ!9iSEu&J8{6*mb8p zJ9alYpDfk2u34BLx|d)0iEVF0?Q2TeClz)s(IF1=qKp z1Ri}Mv}W60c6+m@5!|T;?<#1Q-D~zIHh}l6*;*BFFMt>8m^IV(bw^8Xa(Vpzz?x&I zXc~g?ELwJ~>2{W_w%=Z+VUFfo|M_tjWexGj*8I>n)TI4u+Bei?_@GHYw8O~7A(i_> zc-0nE{}vc$nB90fMbgWzJQYJDeA zxwPr2ngu~{G2lOWglH;ZQ`KmER}t%H(6*s4V^xH#Sv9_3BjMHMRoZ1}Yjy0ZqhQ2w zMJI=e-#3Cd178f1W5FLRZUSc8T#J#evZPzLM((p5&H_`;w|pKxxrK5mImFUjL$x*) z>#B?nOT74$5~hTD)@>Q%XJ%ay-&lLbv@w_T-#Tl}SSQfvxanc*&m!luD*KKEwA0#`b<6RAy%g+wt)7k)~Y+>=3T0_?#nnqjpSF`)xa}P zpV}d}D`U?nb!8kG=OM_rb1_plfrigbPsS;a7+9WIu!GcbEh$tglLs)>DXsQo922|b zoNAa+iD+9;UIQCYuz0tCX=D{k+pI6uE8~auYct-A4+JMrsNh3HN&jp^rUAy?2{ai{ zjZJeVuyK{$sn=1e{FaQL)3DnTG~2onj-;E0!9#TzT#7acV46GKk~H#HOau?}2bYu?X=zBHW)~k|l%mm&AHRCMhH*qdx)%io+Nu8CR<9M$IBH zZ82czfAfFfNjHiZ&Y=g?rWU64klN4u1`jRJ>4j65z@4B@G7*i4z6{)6$mDk8YY)E2 zCtCBaJn7J-*Gh|H^Cz{*Yx2K|ID<#2-+tTBEBj3~)CIDLYD)NN3(@`R=3 z!kDc?p$5F1fSCoOeLQ)c8^vS;4~}6`NJ_;fqrmUS!P}BMi}j04Kc8Zu=fJH_#bYVa zA&Kz2NPEPbL;RBj!}Jsjwj5$42{u-_3z2z6gN|S(ONXc$%|vyIQEnXZaF{RZQ(;a8 zBQ_7D2Eh^m-_aN%=D9h9vEq~iZ=6Cd+>9TjNrFwXx)FCo(TGIsLwYg^9C}9(k0YT@ z1{Cc>)sYqqqM?sFi1lbE`7s_HuZZ|p^F_3~+!$iU*JJqOe+f$mEJMH@2Smke?pn#Z zYi{tN)lv3rf8g1%>e*4;`P52V$#XbserWZ4e4u~j;9n#k9C&v1z_V)yhI0-1(;o(U z*DhZxzBE?4Jf1yWb~it8x39X}*W5ckZf5fC*H0JBtId52W_8ZG(_3r+R4coRzLRUt zQ)Pbxu9L%bN z$u|}*mK=M4Gq$=3uX%RObgt}hLII~|Vc^>{xu;eg?ZD4l-FJ`9A6>IHm)%Xd@m2TE ztf`vzk!f4m+q`sj@#2$TcNo6WB^zZEHTEx)kZ zeYmvqNGWhMd*-3l3+?^R$$WpQIaJ_QoA;Ld`{su}M%{L(aJw*3tUtJB>xV|81aV@a zYoW1dXW zbb0Y|?q+_b}w*fcILv~k}s4!_0Zi&W@GoVwlH1X|4h;M?3(l9Lzs}p zzUAP`mrIQ&vL_ezee3KaTf+le>#D6aul)rm=G~OP{SzN$@~j8;EuUV=lmh3nb!AiC zUHiO!A+cuK@yOaBEvo19mrB-d5V}+;&z7v6bA#*t?!tlP{VVJTo;CmZRh?(vGsSn4c7Nh1HQ8z+Mjpm&xVX3{U?(t4je5g?kc{n!E1x>j}a1ZV|u(7r~+jP zmL@!>4!97NYcD965o*%|j&>5~BZqK0{tcXt;C445ot)|-wYb=-^DXbFT?$NHV7ue! zJ0Y{`RIPI9z|kA4oc;uk!KNppaO$%rR*(N=&H(s4MScA>QwmOY@saa69c###ZqiWu zWCnO=WEz2cra3Qgylxq~8nQ}F8Jehu={xfo&6dy#=a|t*(D{sZOK8Q{D5I62e*~sH zMcG?zMRja|zxr?JXYyJ8;?AtdU4SdIVqTo^RdL9%6gs<+XH=uOIzLuioh|9xCu4B) z0Zf5acX~_i1uCj=2(ebQ9-(fgVYl;S>s!uk%EIvuWiA?cCR(D}tWG zvkN@`z|$EH%~Tk?uF=(moWUHh)f5*&I7bZMl z{>TGs$Evj>KfGoQ7A}APrFULh`O1gA=)h3X)tS8;PNc6cel;I1`Mabe>0@_Gj(uxz z%^fT}zp{U|_bB)(mfilGVewFQ@S($<)4geW&6M|*8oCODA2#fP?5$EgInSF7uQlX5 zOM&h}^uxeDL}hc`JvVs$#gO~gOc(_La78hmX++aPW zKgW8i_)W_`1`)gl2mj=)39a>;g@zeg5*akj2rh|0E*+<{1QaKfNpJm zU~ONuwy#-tlpU_DdEE>Oveq>-1B{QuefO*LU(JO}4(3D0&irxIU2cD1W@P*qv z*zRhpbkr&{P_02pa88s;YQ>T`?{Ev`^r>iyk=uwq;CQi&-2G5@qetA@MqvxR9)>$z zaJfwK(HoJ=h6J-5G~^mq9c@KZTiM_K zz~8y*?<{upuN*J=j}~>Fhem&~xn~(T;DZ2J3(N(TAJ9)4@EM@pwecBL%kp#de!y#i$VN01X3(RVYq>4RTM>9wD_Sv_UFnf? zk-q{j;;xKEb^^i{f*a6pFN!7<)zPj@9In$-vDDD?b>Y1b0BCq2qoCrN4%B{@)=&7zcmH!pPkDbvKB;PnO(I z&7FH_^_LwjMN`Wo@Ae1Y9jji*Ij~Y!@}8PI4-|cOc78T@1&l=K2DNT&&D#p&CF_%- z2rZTaarXA&?IN>hIaKn4km4!ZeRr?TU&}oOwkLbn+{uSlSJ}~8G_`^>=M4}O(+KP{ zp1!%K%l?iB{_a(O_b>fD^Hvb+o-bSL?jD>!`0X#{cD~v3TF>jDlC=vQroQzw$cGKR z;2~&hF6x@d*0Mpm!EKKm1x~V`RNW;q%KZSadT44=Hn>hHl&!BG))`$!ksgSgqa5#0 zm-MjUf})f~il=Arh%utW71$|g&;n``bOts1HE&ITq*1LKboIO^Lvi(6z`~_Tr7ncs z0uH2}4RAo;hV&0Eeuln*s>94+GI9gBcGThE&1jB^O@cSIMA7F&>QyKdsa1g-A&@+# zwH-oc{mAY^XlXvu2DcEU6cUw|ylB9eeYxA(nAj-jBgOhqVQ{Rw<7A0Zg7?5SuR&mtOp+#gY4E(%RGp?!N$*JQCL{5CKcB zlk)78sp|JpPTB6d`@;MSMPFB;tz_?&=)?8vZEe|0po!SNFt~7F)zVnd%?*^ns(WVs z%mNQqPh0EUiHBC_;-#|Fw`5tgNh%Fo^k2d-b3eh?kKqMp#a4@v$C)9Uy2!N!KD*|m zin#j#xq_>17$UIhEMIWdJ!;wh<_oXAP~3TNWpAnFNO|MuCwi^TP@%MX!)JF^6I4WY z*1z1mv%)2EV4NRy+uN#kV{k;Q!B4>+tJsRSfH0~c5I3Gs@XXq) zdZ-O`)`3n8eFHW7|p!s4BrBrKZKAD=@< zZ&XuAKczt~-T>i%yBV%J6o?^0j5m{@sNq436n|Gj=BSY96atWT?M-ABCWM3T-TC8}=4;UZQaS$lD0=yWO=Ayfc!g z*WLc5Ba27!+e_|_>>!BlbuEuTpuatTJ7>siN{;rduIy_IItzEnK|1 zuYzMbtU7!^zE_;aQT0;OBZr<)H1lF9d;@HK^-GQ^JF2$oOOis{FL@>uZMK{Oc(knh zJ&$~f$nE-E4-jt^5?WHj(}VL?`&%1A#sHoH7OAQDa3-Oj}m&;W{ zd0zPT>WHwujDDjp!sqzAsNC9rKcvbd1a7x}c2sN6F#L3oV#i?BPV&Fs3rF25tK8!a zYpYXE{r|eQz`n(wQnffxyH_CXH9}5}wF@~-kW*v*!mOz7i~s(yW>srY)tY9NFO7l= zYHU$^@33CbMHzq>1C1r{TsxF;GF4{{{lr)=+L}N+N~^Ge|BSraKaV~$Mh@Crg`R=i-s-Pl`}uUo#n1kp z82MS4VHGa~iq)53aMA|xC#TZep8<~uRDOkP{B#wuXyLCg!G*agEP^6%x(Vw+3mLKg zNJ0s5hk;B0BCgo(lQH~V4>W(_H@VN9HcG$ql=f6}iKjM$O0l%Lw^zCkmOlAqxEO|7 zeK0e@?c*djL+X+4v`Ihwg+CJ{UjT<26*9k9EfY*T;i>^JgV1n^149E@5^mY1xany6 zOie~nhTXs-5;<%Gm&kOT8NP6y5q)%m;I@N5PXSHhz+?;_(;+SeAq<`Y42tLyf)Zr9 z`NYMG7cTZQmz2hCc2`R8l|Y8cFNw*&4b}s$i+^KX6csMEjM>!Gflw`oL z$lo6Xd?GKw$pr3yjci7R6oLRBO|)11igYO%#DG2 z6CV}oj&WlkmnKrr;3p^8!Yn%ixg<@n5Fq^Kg*w8m$pgz1=OMzask9hq%olx!G7uB= z?DXUm+==Ajx4v|ElTvhP*WscQEjs9C&gJp-1iXS4bf+Qh&|#e-LrVBg6eC6E6B$y$ zfVQD1%dKH@0$w$(l)6dK6>(+A{(IyoZ@~1;QR{Z^1AE7+y`${u2e*zo!`w+w`01?= zbpBPHKW8d-o+;}5Yr3;#ou!&KNYYMhNIOK*4p-C6#jYbH?WmGgHo438=g?`S;Zu}b zK{F^!|50&kgH)PuCMQ4qYb=%w^w990BU~K%**a$``bfd}Y)?FR>ONIU4KlskU zW%ft0AH?o^{-*K$#**t?)&QP2ci6iV^AkB=fnKZYUNh}lH(BqR=1mK(JJv@XU2mWJ zvvYUYh5bv177wk}G0W`SsYgcZH)sB6X5G^O_Rqj;fmbeM&7c+%UEbFm;`Puy%YkKU zvHsAS?eMzIL73xb@0^uzzuJuGRSbXMcvF_TM=_NIK@asAG>b?cI@x9QKn=D#-)^xpPd)?g0 zU*Qj2t=k(x6Xh@2gOZp0x1R*9)k1jw?EJ}!5q_lxG!&n?;BflYNN&d~6Zr$HzF^VY zUD*EKz_R|`GlfITw~CgdYq~$EUVB*A8NWI7^`XU`*=K*DV{&15iFPH^2-t07iIEXd zwA{kgihBsx0(Qc6K_T7tBD2T4G_xJr+jD9fq7*i^H>j7ea zL3{6Dg@WiZPlA=y{ed6PJjRqt4|R+_PJg29rx}pkDkN-Z7Q>F97(R8FgpXi~_b+%B zB8!3S(N%rZV?7l5I8&F{XUd@C;4cbC)U< zh9Hsdf9I?izrXJ>Nvn)%F3|M0d`E@Ca9IjhhQ;su!;eXNC841@y2^b|e4=M)SJ_fm z(PHQe*2ey$4VOto(cuQeW)LT51@VsD%<3)wbJ#ZYZ%I1gSgT)jLThv mxT Date: Wed, 18 Mar 2026 19:33:21 +0800 Subject: [PATCH 10/13] fix api invoke --- .github/workflows/post_test_cases.yml | 48 ++++++++ flagos-user-tests/repos.yaml | 16 +-- .../inference/qwen3/demo_0_6b/README.md | 2 +- .../inference/qwen3/demo_0_6b/demo_0_6b.yaml | 2 +- flagos-user-tests/tools/collect_test_cases.py | 102 ++++++++++++++++ flagos-user-tests/tools/test_post_report.sh | 113 ++++++++++++++++++ 6 files changed, 273 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/post_test_cases.yml create mode 100644 flagos-user-tests/tools/collect_test_cases.py create mode 100755 flagos-user-tests/tools/test_post_report.sh diff --git a/.github/workflows/post_test_cases.yml b/.github/workflows/post_test_cases.yml new file mode 100644 index 0000000..cb1e5af --- /dev/null +++ b/.github/workflows/post_test_cases.yml @@ -0,0 +1,48 @@ +name: Post Test Cases Report + +on: + pull_request: + branches: [main] + types: [closed] + +defaults: + run: + working-directory: flagos-user-tests + +jobs: + post-report: + if: ${{ github.event.pull_request.merged == true }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install dependencies + run: pip install pyyaml + + - name: Collect test cases + run: python tools/collect_test_cases.py --output test_cases_report.json + + - name: Post report + uses: ./actions/post-benchmark-report + with: + backend_url: ${{ secrets.FLAGOPS_BACKEND_URL }} + api_token: ${{ secrets.FLAGOPS_API_TOKEN }} + report_path: flagos-user-tests/test_cases_report.json + list_code: flagops-user-test-cases + list_name: FlagOps User Test Cases + header_config: >- + [ + {"field": "case_id", "name": "用例ID", "required": true, "sortable": true, "type": "string"}, + {"field": "case_name", "name": "用例名称", "required": true, "sortable": false, "type": "string"}, + {"field": "repo", "name": "所属子仓库", "required": true, "sortable": true, "type": "string"}, + {"field": "updated_at", "name": "更新时间", "required": true, "sortable": true, "type": "string"} + ] + fail_on_error: "false" diff --git a/flagos-user-tests/repos.yaml b/flagos-user-tests/repos.yaml index fdde8e1..fe7f816 100644 --- a/flagos-user-tests/repos.yaml +++ b/flagos-user-tests/repos.yaml @@ -5,41 +5,41 @@ repositories: flagscale: - url: https://github.com/FlagOpen/FlagScale.git + url: https://github.com/flagos-ai/FlagScale.git default_branch: main description: Large-scale distributed training framework flaggems: - url: https://github.com/FlagOpen/FlagGems.git + url: https://github.com/flagos-ai/FlagGems.git default_branch: main description: GPU-accelerated math library flagcx: - url: https://github.com/FlagOpen/FlagCX.git + url: https://github.com/flagos-ai/FlagCX.git default_branch: main description: Cross-chip communication library flagtree: - url: https://github.com/FlagOpen/FlagTree.git + url: https://github.com/flagos-ai/FlagTree.git default_branch: main description: Tree-structured computation library vllm-fl: - url: https://github.com/FlagOpen/vLLM-FL.git + url: https://github.com/flagos-ai/vLLM-FL.git default_branch: main description: LLM inference engine vllm-plugin-fl: - url: https://github.com/FlagOpen/vLLM-plugin-FL.git + url: https://github.com/flagos-ai/vLLM-plugin-FL.git default_branch: main description: vLLM plugin system te-fl: - url: https://github.com/FlagOpen/TransformerEngine-FL.git + url: https://github.com/flagos-ai/TransformerEngine-FL.git default_branch: main description: Transformer Engine megatron-lm-fl: - url: https://github.com/FlagOpen/Megatron-LM-FL.git + url: https://github.com/flagos-ai/Megatron-LM-FL.git default_branch: main description: Megatron-LM fork diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md index b38f61b..491ebcd 100644 --- a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md @@ -15,7 +15,7 @@ Runs 4 prompts with greedy decoding (temperature=0, max_tokens=10) and verifies ## How to Run ```bash -git clone https://github.com/FlagOpen/FlagScale.git && cd FlagScale && pip install . +git clone https://github.com/flagos-ai/FlagScale.git && cd FlagScale && pip install . flagscale inference qwen3 --config ./conf/demo_0_6b.yaml ``` diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml index e266a48..1399352 100644 --- a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml @@ -20,7 +20,7 @@ env: CUDA_DEVICE_MAX_CONNECTIONS: "1" setup: - - git clone https://github.com/FlagOpen/FlagScale.git && cd FlagScale && pip install . + - git clone https://github.com/flagos-ai/FlagScale.git && cd FlagScale && pip install . run: - flagscale inference qwen3 --config ./conf/demo_0_6b.yaml --test diff --git a/flagos-user-tests/tools/collect_test_cases.py b/flagos-user-tests/tools/collect_test_cases.py new file mode 100644 index 0000000..365e3ce --- /dev/null +++ b/flagos-user-tests/tools/collect_test_cases.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +"""Collect all test cases and output a JSON report for post-benchmark-report action. + +Output format: + [ + { + "case_id": "tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml", + "case_name": "flagscale-inference-qwen3-demo_0_6b", + "repo": "flagscale", + "updated_at": "2026-03-18T15:02:29+08:00" + }, + ... + ] + +Usage: + python tools/collect_test_cases.py --root . --output report.json +""" + +import argparse +import json +import subprocess +import sys +from datetime import datetime, timezone +from pathlib import Path + +import yaml + + +def get_file_updated_time(filepath: Path) -> str: + """Get the last commit time of a file via git, fallback to mtime.""" + try: + result = subprocess.run( + ["git", "log", "-1", "--format=%aI", str(filepath)], + capture_output=True, text=True, timeout=10, + ) + if result.returncode == 0 and result.stdout.strip(): + return result.stdout.strip() + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + + # Fallback to file modification time + mtime = filepath.stat().st_mtime + return datetime.fromtimestamp(mtime, tz=timezone.utc).strftime("%Y/%m/%d %H:%M:%S") + + +def make_case_id(meta: dict) -> str: + """Generate a case ID from meta fields: ---.""" + parts = [ + meta.get("repo", "unknown"), + meta.get("task", ""), + meta.get("model", ""), + meta.get("case", ""), + ] + return "-".join(p for p in parts if p) + + +def collect_test_cases(root: Path) -> list: + """Discover all test cases and return report list.""" + tests_dir = root / "tests" + report = [] + + for yaml_path in sorted(tests_dir.rglob("*.yaml")): + if yaml_path.name.startswith("_") or yaml_path.name == "data.yaml": + continue + + try: + with open(yaml_path) as f: + data = yaml.safe_load(f) + if not isinstance(data, dict) or "meta" not in data: + continue + + meta = data["meta"] + report.append({ + "case_id": str(yaml_path.relative_to(root)), + "case_name": make_case_id(meta), + "repo": meta.get("repo", "unknown"), + "updated_at": get_file_updated_time(yaml_path), + }) + except (yaml.YAMLError, KeyError): + continue + + return report + + +def main(): + parser = argparse.ArgumentParser(description="Collect test cases for reporting") + parser.add_argument("--root", default=".", help="Root directory of flagos-user-tests") + parser.add_argument("--output", default="test_cases_report.json", help="Output JSON file") + args = parser.parse_args() + + root = Path(args.root) + report = collect_test_cases(root) + + with open(args.output, "w") as f: + json.dump(report, f, ensure_ascii=False, indent=2) + + print(f"Collected {len(report)} test case(s) -> {args.output}") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/flagos-user-tests/tools/test_post_report.sh b/flagos-user-tests/tools/test_post_report.sh new file mode 100755 index 0000000..3d7de1d --- /dev/null +++ b/flagos-user-tests/tools/test_post_report.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# Local test script for posting test cases report. +# +# Usage: +# ./tools/test_post_report.sh [api_token] +# +# Example: +# ./tools/test_post_report.sh http://10.0.0.1:8080 +# ./tools/test_post_report.sh http://10.0.0.1:8080 my-secret-token + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +BACKEND_URL="${1:?Usage: $0 [api_token]}" +BACKEND_URL="${BACKEND_URL%/}" +API_TOKEN="${2:-}" + +LIST_CODE="flagops-user-test-cases" +LIST_NAME="FlagOps User Test Cases" +REPORT_PATH="$ROOT_DIR/test_cases_report.json" + +HEADER_CONFIG='[ + {"field": "case_id", "name": "用例ID", "required": true, "sortable": true, "type": "string"}, + {"field": "case_name", "name": "用例名称", "required": true, "sortable": false, "type": "string"}, + {"field": "repo", "name": "所属子仓库", "required": true, "sortable": true, "type": "string"}, + {"field": "updated_at", "name": "更新时间", "required": true, "sortable": true, "type": "string"} +]' + +# --- Step 1: Collect test cases --- +echo "=== Step 1: Collect test cases ===" +cd "$ROOT_DIR" +python tools/collect_test_cases.py --root . --output "$REPORT_PATH" +echo "Report content:" +cat "$REPORT_PATH" | python -m json.tool +echo "" + +# --- Step 2: Post header config --- +echo "=== Step 2: Post header config ===" +HEADER_PAYLOAD=$(jq -n \ + --arg list_code "$LIST_CODE" \ + --arg list_name "$LIST_NAME" \ + --argjson header_config "$HEADER_CONFIG" \ + '{list_code: $list_code, list_name: $list_name, header_config: $header_config}') + +echo "URL: ${BACKEND_URL}/flagcicd-backend/list/header" +echo "Payload:" +echo "$HEADER_PAYLOAD" | jq . + +CURL_ARGS=(-s -X POST -w '\n%{http_code}' -H "Content-Type: application/json" -d "$HEADER_PAYLOAD") +[ -n "$API_TOKEN" ] && CURL_ARGS+=(-H "Authorization: Bearer $API_TOKEN") + +RESPONSE=$(curl "${CURL_ARGS[@]}" "${BACKEND_URL}/flagcicd-backend/list/header") +HTTP_STATUS=$(echo "$RESPONSE" | tail -n1) +RESPONSE_BODY=$(echo "$RESPONSE" | sed '$d') + +echo "HTTP status: $HTTP_STATUS" +echo "Response: $RESPONSE_BODY" +echo "" + +# --- Step 3: Post list data --- +echo "=== Step 3: Post list data ===" +COMMIT_ID="$(git rev-parse HEAD 2>/dev/null || echo 'unknown')" +REPO_NAME="flagos-ai/FlagOps" +WORKFLOW_ID="local-test" +RUN_ID="local-$$" + +DATA_PAYLOAD=$(jq -n \ + --arg repository_name "$REPO_NAME" \ + --slurpfile report "$REPORT_PATH" \ + '{ + items: [ $report[0][] | . + { + repository_name: $repository_name + } ] + }') + +echo "URL: ${BACKEND_URL}/flagcicd-backend/list/data/${LIST_CODE}" +echo "Items count: $(echo "$DATA_PAYLOAD" | jq '.items | length')" +echo "Payload (first item sample):" +echo "$DATA_PAYLOAD" | jq '{items_count: (.items | length), first_item: .items[0]}' + +CURL_ARGS=(-s -X POST -w '\n%{http_code}' -H "Content-Type: application/json" -d "$DATA_PAYLOAD") +[ -n "$API_TOKEN" ] && CURL_ARGS+=(-H "Authorization: Bearer $API_TOKEN") + +RESPONSE=$(curl "${CURL_ARGS[@]}" "${BACKEND_URL}/flagcicd-backend/list/data/${LIST_CODE}") +HTTP_STATUS=$(echo "$RESPONSE" | tail -n1) +RESPONSE_BODY=$(echo "$RESPONSE" | sed '$d') + +echo "HTTP status: $HTTP_STATUS" +echo "Response: $RESPONSE_BODY" +echo "" + +# --- Step 4: Query to verify --- +echo "=== Step 4: Query list data ===" +QUERY_URL="${BACKEND_URL}/flagcicd-backend/list/data/${LIST_CODE}?page_size=10&page=1&sort=created_at&order=desc" +echo "URL: $QUERY_URL" + +CURL_ARGS=(-s -X GET -w '\n%{http_code}' -H "Accept: application/json") +[ -n "$API_TOKEN" ] && CURL_ARGS+=(-H "Authorization: Bearer $API_TOKEN") + +RESPONSE=$(curl "${CURL_ARGS[@]}" "$QUERY_URL") +HTTP_STATUS=$(echo "$RESPONSE" | tail -n1) +RESPONSE_BODY=$(echo "$RESPONSE" | sed '$d') + +echo "HTTP status: $HTTP_STATUS" +echo "Response:" +echo "$RESPONSE_BODY" | jq . 2>/dev/null || echo "$RESPONSE_BODY" + +# Cleanup +rm -f "$REPORT_PATH" +echo "" +echo "=== Done ===" From 3b6d0820abf13b958b74fc930babf29b071710fa Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Wed, 18 Mar 2026 20:08:22 +0800 Subject: [PATCH 11/13] model path --- .../inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml index c91ac04..f1ce909 100644 --- a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml +++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml @@ -1,5 +1,5 @@ llm: - model: Qwen/Qwen3-0.6B + model: /home/gitlab-runner/data/Qwen3-0.6B trust_remote_code: true tensor_parallel_size: 1 pipeline_parallel_size: 1 From 8bda34ea273bc11a64761c65a831b4de0fde1dd6 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Thu, 19 Mar 2026 14:25:20 +0800 Subject: [PATCH 12/13] fix post api invoke --- flagos-user-tests/tools/collect_test_cases.py | 26 +++++++++++-------- flagos-user-tests/tools/test_post_report.sh | 19 +++++++++++--- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/flagos-user-tests/tools/collect_test_cases.py b/flagos-user-tests/tools/collect_test_cases.py index 365e3ce..e349a38 100644 --- a/flagos-user-tests/tools/collect_test_cases.py +++ b/flagos-user-tests/tools/collect_test_cases.py @@ -1,16 +1,15 @@ #!/usr/bin/env python3 """Collect all test cases and output a JSON report for post-benchmark-report action. -Output format: - [ - { - "case_id": "tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml", +Output format (object-of-objects, keyed by case_id): + { + "tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml": { "case_name": "flagscale-inference-qwen3-demo_0_6b", "repo": "flagscale", "updated_at": "2026-03-18T15:02:29+08:00" }, ... - ] + } Usage: python tools/collect_test_cases.py --root . --output report.json @@ -54,10 +53,15 @@ def make_case_id(meta: dict) -> str: return "-".join(p for p in parts if p) -def collect_test_cases(root: Path) -> list: - """Discover all test cases and return report list.""" +def collect_test_cases(root: Path) -> dict: + """Discover all test cases and return report dict keyed by case_id. + + The post-benchmark-report action expects an object-of-objects format where: + - Each key maps to header_config[0].field (case_id) + - Each value is an object with fields matching header_config[1+] + """ tests_dir = root / "tests" - report = [] + report = {} for yaml_path in sorted(tests_dir.rglob("*.yaml")): if yaml_path.name.startswith("_") or yaml_path.name == "data.yaml": @@ -70,12 +74,12 @@ def collect_test_cases(root: Path) -> list: continue meta = data["meta"] - report.append({ - "case_id": str(yaml_path.relative_to(root)), + case_id = str(yaml_path.relative_to(root)) + report[case_id] = { "case_name": make_case_id(meta), "repo": meta.get("repo", "unknown"), "updated_at": get_file_updated_time(yaml_path), - }) + } except (yaml.YAMLError, KeyError): continue diff --git a/flagos-user-tests/tools/test_post_report.sh b/flagos-user-tests/tools/test_post_report.sh index 3d7de1d..2783e36 100755 --- a/flagos-user-tests/tools/test_post_report.sh +++ b/flagos-user-tests/tools/test_post_report.sh @@ -68,11 +68,24 @@ RUN_ID="local-$$" DATA_PAYLOAD=$(jq -n \ --arg repository_name "$REPO_NAME" \ + --arg workflow_id "$WORKFLOW_ID" \ + --arg commit_id "$COMMIT_ID" \ + --arg run_id "$RUN_ID" \ + --argjson header_config "$HEADER_CONFIG" \ --slurpfile report "$REPORT_PATH" \ '{ - items: [ $report[0][] | . + { - repository_name: $repository_name - } ] + items: [ $report[0] | to_entries[] | . as $entry | + ([ $header_config | to_entries[] | .value.field as $f | + if .key == 0 then {($f): $entry.key} + else {($f): $entry.value[$f]} + end + ] | add) + { + commit_id: $commit_id, + repository_name: $repository_name, + workflow_id: $workflow_id, + run_id: $run_id + } + ] }') echo "URL: ${BACKEND_URL}/flagcicd-backend/list/data/${LIST_CODE}" From 96244cf6975cc4ebf4ea7b2149c11299aeb860e0 Mon Sep 17 00:00:00 2001 From: liyuzhuo Date: Thu, 19 Mar 2026 19:46:00 +0800 Subject: [PATCH 13/13] support manual trigger --- .github/workflows/post_test_cases.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/post_test_cases.yml b/.github/workflows/post_test_cases.yml index cb1e5af..6e5f243 100644 --- a/.github/workflows/post_test_cases.yml +++ b/.github/workflows/post_test_cases.yml @@ -4,6 +4,7 @@ on: pull_request: branches: [main] types: [closed] + workflow_dispatch: defaults: run: @@ -11,7 +12,7 @@ defaults: jobs: post-report: - if: ${{ github.event.pull_request.merged == true }} + if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true }} runs-on: ubuntu-latest steps: - name: Checkout