diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..ce1bbdd
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,42 @@
+# FlagOS DevOps - Code Owners
+
+# Default owners for everything
+* @flagos-ai/devops-team
+
+# CI/CD workflows
+.github/ @flagos-ai/devops-team
+
+# Shared actions
+actions/ @flagos-ai/devops-team
+
+# === User Tests ===
+
+# FlagScale test cases
+flagos-user-tests/tests/flagscale/ @flagos-ai/flagscale-team
+
+# FlagGems test cases
+flagos-user-tests/tests/flaggems/ @flagos-ai/flaggems-team
+
+# FlagCX test cases
+flagos-user-tests/tests/flagcx/ @flagos-ai/flagcx-team
+
+# FlagTree test cases
+flagos-user-tests/tests/flagtree/ @flagos-ai/flagtree-team
+
+# vLLM-FL test cases
+flagos-user-tests/tests/vllm-fl/ @flagos-ai/vllm-team
+
+# vLLM-plugin-FL test cases
+flagos-user-tests/tests/vllm-plugin-fl/ @flagos-ai/vllm-team
+
+# TE-FL test cases
+flagos-user-tests/tests/te-fl/ @flagos-ai/te-team
+
+# Megatron-LM-FL test cases
+flagos-user-tests/tests/megatron-lm-fl/ @flagos-ai/megatron-team
+
+# Experimental test cases
+flagos-user-tests/tests/experimental/ @flagos-ai/devops-team
+
+# Validation tools
+flagos-user-tests/tools/ @flagos-ai/devops-team
diff --git a/.github/ISSUE_TEMPLATE/new_test_case.yml b/.github/ISSUE_TEMPLATE/new_test_case.yml
new file mode 100644
index 0000000..899c5b4
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/new_test_case.yml
@@ -0,0 +1,94 @@
+name: New Test Case Submission
+description: Submit a new test case for FlagOS repositories
+title: "[Test Case] "
+labels: ["new-test-case"]
+body:
+  - type: dropdown
+    id: target-repo
+    attributes:
+      label: Target Repository
+      description: Which FlagOS repository is this test case for?
+      options:
+        - FlagScale
+        - FlagGems
+        - FlagCX
+        - FlagTree
+        - vLLM-FL
+        - vLLM-plugin-FL
+        - TE-FL
+        - Megatron-LM-FL
+    validations:
+      required: true
+
+  - type: dropdown
+    id: test-type
+    attributes:
+      label: Test Type
+      description: What type of test is this?
+      options:
+        - train
+        - inference
+        - hetero_train
+        - unit
+        - integration
+        - benchmark
+    validations:
+      required: true
+
+  - type: input
+    id: model-name
+    attributes:
+      label: Model Name
+      description: Name of the model being tested (if applicable)
+      placeholder: e.g., llama2, mixtral, deepseek
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Test Case Description
+      description: Describe what this test case validates
+      placeholder: |
+        This test case validates ...
+    validations:
+      required: true
+
+  - type: textarea
+    id: config
+    attributes:
+      label: Configuration
+      description: Paste the YAML configuration for the test case
+      render: yaml
+    validations:
+      required: true
+
+  - type: textarea
+    id: gold-values
+    attributes:
+      label: Gold Values
+      description: Paste the expected gold values (JSON format)
+      render: json
+
+  - type: textarea
+    id: environment
+    attributes:
+      label: Environment Requirements
+      description: Describe the hardware/software requirements
+      placeholder: |
+        - GPU: 8x A100 80GB
+        - CUDA: 12.1
+        - Python: 3.10
+    validations:
+      required: true
+
+  - type: checkboxes
+    id: checklist
+    attributes:
+      label: Submission Checklist
+      options:
+        - label: I have tested this test case locally
+          required: true
+        - label: I have included gold values (if applicable)
+        - label: I have added a README.md with test description
+          required: true
+        - label: My YAML configuration follows the schema specification
+          required: true
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..4f688bf
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,37 @@
+## Test Case PR
+
+### Target Repository
+<!-- Which FlagOS repo does this test case target? -->
+- [ ] FlagScale
+- [ ] FlagGems
+- [ ] FlagCX
+- [ ] FlagTree
+- [ ] vLLM-FL
+- [ ] vLLM-plugin-FL
+- [ ] TE-FL
+- [ ] Megatron-LM-FL
+
+### Test Type
+<!-- What kind of test is this? -->
+- [ ] train
+- [ ] inference
+- [ ] hetero_train
+- [ ] unit
+- [ ] integration
+
+### Description
+<!-- Briefly describe what this test case validates -->
+
+
+### Environment Requirements
+<!-- What hardware/software is needed to run this test? -->
+- GPU:
+- CUDA:
+- Python:
+
+### Checklist
+- [ ] YAML configuration passes schema validation
+- [ ] Gold values are included (if applicable)
+- [ ] README.md is present for each test case
+- [ ] Test case has been verified locally
+- [ ] No sensitive data (tokens, passwords, private paths) in configs
diff --git a/.github/scripts/detect_changed_repos.js b/.github/scripts/detect_changed_repos.js
new file mode 100644
index 0000000..08ea150
--- /dev/null
+++ b/.github/scripts/detect_changed_repos.js
@@ -0,0 +1,65 @@
+// Detect which repos have changed test cases.
+//
+// Outputs (via core.setOutput):
+//   changed_cases      — JSON array of case paths (manual single-case dispatch)
+//   changed_repos      — JSON object {repo, task, model} (manual repo dispatch or _none_)
+//   changed_repos_list — JSON array of repo names (auto-detected from PR/push)
+//
+// Called from workflow via:
+//   uses: actions/github-script@v7
+//   with:
+//     script: |
+//       const run = require('./.github/scripts/detect_changed_repos.js');
+//       await run({ github, context, core });
+
+module.exports = async ({ github, context, core }) => {
+  const inputCase = process.env.INPUT_CASE || '';
+  const inputRepo = process.env.INPUT_REPO || '';
+  const inputTask = process.env.INPUT_TASK || '';
+  const inputModel = process.env.INPUT_MODEL || '';
+
+  // Manual dispatch — single case
+  if (inputCase) {
+    core.setOutput('changed_cases', JSON.stringify([inputCase]));
+    return;
+  }
+
+  // Manual dispatch — by repo
+  if (inputRepo) {
+    core.setOutput('changed_repos', JSON.stringify({
+      repo: inputRepo,
+      task: inputTask,
+      model: inputModel,
+    }));
+    return;
+  }
+
+  // Auto-detect from changed files
+  let files = [];
+  if (context.eventName === 'pull_request') {
+    const resp = await github.paginate(
+      github.rest.pulls.listFiles,
+      { owner: context.repo.owner, repo: context.repo.repo, pull_number: context.issue.number }
+    );
+    files = resp.map(f => f.filename);
+  } else {
+    const resp = await github.rest.repos.compareCommits({
+      owner: context.repo.owner, repo: context.repo.repo,
+      base: context.payload.before, head: context.payload.after,
+    });
+    files = resp.data.files.map(f => f.filename);
+  }
+
+  // Extract unique repos from changed paths
+  const repos = new Set();
+  for (const f of files) {
+    const m = f.match(/^flagos-user-tests\/tests\/([^/]+)\//);
+    if (m && m[1] !== 'experimental') repos.add(m[1]);
+  }
+
+  if (repos.size === 0) {
+    core.setOutput('changed_repos', JSON.stringify({ repo: '_none_' }));
+  } else {
+    core.setOutput('changed_repos_list', JSON.stringify([...repos]));
+  }
+};
diff --git a/.github/workflows/nightly_integration.yml b/.github/workflows/nightly_integration.yml
new file mode 100644
index 0000000..9b51668
--- /dev/null
+++ b/.github/workflows/nightly_integration.yml
@@ -0,0 +1,94 @@
+name: Nightly Integration Test - User Tests
+
+on:
+  schedule:
+    - cron: "0 2 * * *"
+  workflow_dispatch:
+
+defaults:
+  run:
+    working-directory: flagos-user-tests
+
+jobs:
+  discover-cases:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.resolve.outputs.matrix }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: pip install pyyaml
+
+      - name: Discover all test cases and resolve runner labels
+        id: resolve
+        working-directory: flagos-user-tests
+        run: |
+          python3 -c "
+          import json, os, sys
+          sys.path.insert(0, 'tools')
+          from run_user_tests import list_test_resources
+          from pathlib import Path
+
+          root = Path('.')
+          resources_list = list_test_resources(root)
+
+          matrix_entries = []
+          for entry in resources_list:
+              matrix_entries.append({
+                  'case_path': entry['case_path'],
+                  'runner_labels': json.dumps(entry['runner_labels']),
+              })
+
+          if not matrix_entries:
+              matrix_entries.append({
+                  'case_path': '_none_',
+                  'runner_labels': json.dumps(['ubuntu-latest']),
+              })
+
+          matrix = {'include': matrix_entries}
+          output = json.dumps(matrix)
+          print(f'Matrix: {output}')
+          with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+              f.write(f'matrix={output}\n')
+          "
+
+  run-tests:
+    needs: discover-cases
+    if: ${{ !contains(needs.discover-cases.outputs.matrix, '_none_') }}
+    runs-on: ${{ fromJson(matrix.runner_labels) }}
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.discover-cases.outputs.matrix) }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install runner dependencies
+        run: pip install pyyaml
+
+      - name: Run test case
+        run: python tools/run_user_tests.py --case ${{ matrix.case_path }}
+
+  notify:
+    needs: run-tests
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Generate summary
+        run: |
+          echo "## Nightly Integration Test Summary" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Run: ${{ github.run_number }}" >> $GITHUB_STEP_SUMMARY
+          echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')" >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/post_test_cases.yml b/.github/workflows/post_test_cases.yml
new file mode 100644
index 0000000..6e5f243
--- /dev/null
+++ b/.github/workflows/post_test_cases.yml
@@ -0,0 +1,49 @@
+name: Post Test Cases Report
+
+on:
+  pull_request:
+    branches: [main]
+    types: [closed]
+  workflow_dispatch:
+
+defaults:
+  run:
+    working-directory: flagos-user-tests
+
+jobs:
+  post-report:
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: pip install pyyaml
+
+      - name: Collect test cases
+        run: python tools/collect_test_cases.py --output test_cases_report.json
+
+      - name: Post report
+        uses: ./actions/post-benchmark-report
+        with:
+          backend_url: ${{ secrets.FLAGOPS_BACKEND_URL }}
+          api_token: ${{ secrets.FLAGOPS_API_TOKEN }}
+          report_path: flagos-user-tests/test_cases_report.json
+          list_code: flagops-user-test-cases
+          list_name: FlagOps User Test Cases
+          header_config: >-
+            [
+              {"field": "case_id",    "name": "用例ID",     "required": true,  "sortable": true,  "type": "string"},
+              {"field": "case_name",  "name": "用例名称",   "required": true,  "sortable": false, "type": "string"},
+              {"field": "repo",       "name": "所属子仓库", "required": true,  "sortable": true,  "type": "string"},
+              {"field": "updated_at", "name": "更新时间",   "required": true,  "sortable": true,  "type": "string"}
+            ]
+          fail_on_error: "false"
diff --git a/.github/workflows/pr_validation.yml b/.github/workflows/pr_validation.yml
new file mode 100644
index 0000000..0083b57
--- /dev/null
+++ b/.github/workflows/pr_validation.yml
@@ -0,0 +1,85 @@
+name: PR Validation - User Tests
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - "flagos-user-tests/**"
+
+defaults:
+  run:
+    working-directory: flagos-user-tests
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: pip install pyyaml jsonschema
+
+      # Get the actual list of changed files in the PR (github.event.pull_request.changed_files is just a count)
+      - name: Get changed files
+        id: changed
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const files = await github.paginate(
+              github.rest.pulls.listFiles,
+              { owner: context.repo.owner, repo: context.repo.repo, pull_number: context.issue.number }
+            );
+            const changed = files
+              .map(f => f.filename)
+              .filter(f => f.startsWith('flagos-user-tests/'))
+              .map(f => f.replace('flagos-user-tests/', ''));
+            core.setOutput('files', changed.join(','));
+
+      # Step 1: Schema validation — only validate changed files
+      - name: Validate YAML/JSON Schema
+        run: |
+          python tools/validators/validate_config.py \
+            --changed-files "${{ steps.changed.outputs.files }}"
+
+      # Step 2: Required fields check
+      - name: Check Required Fields
+        run: python tools/validators/lint_test_case.py --strict
+
+      # Step 3: Gold values format validation
+      - name: Validate Gold Values
+        run: python tools/validators/validate_gold_values.py
+
+      # Step 4: Documentation completeness check
+      - name: Check Documentation
+        run: |
+          errors=0
+          # Skip sub-config directories (conf/train/data etc.)
+          SUB_CONFIG_DIRS="conf train inference data"
+          for dir in $(find tests -mindepth 3 -maxdepth 5 -type d); do
+            dirname=$(basename "$dir")
+            # Skip sub-config directories
+            skip=false
+            for sub in $SUB_CONFIG_DIRS; do
+              if [ "$dirname" = "$sub" ]; then skip=true; break; fi
+            done
+            if [ "$skip" = "true" ]; then continue; fi
+
+            # If the directory contains .yaml files, check for README.md
+            if ls "$dir"/*.yaml 1>/dev/null 2>&1; then
+              if [ ! -f "$dir/README.md" ]; then
+                echo "ERROR: Missing README.md in $dir"
+                errors=$((errors + 1))
+              fi
+            fi
+          done
+          if [ $errors -gt 0 ]; then
+            echo "Found $errors test case directories without README.md"
+            exit 1
+          fi
+          echo "All test case directories have README.md"
diff --git a/.github/workflows/test_dispatch.yml b/.github/workflows/test_dispatch.yml
new file mode 100644
index 0000000..c5253bd
--- /dev/null
+++ b/.github/workflows/test_dispatch.yml
@@ -0,0 +1,114 @@
+name: Test Dispatch - User Tests
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "flagos-user-tests/tests/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - "flagos-user-tests/tests/**"
+  workflow_dispatch:
+    inputs:
+      repo:
+        description: "Target repository (e.g., flagscale, flaggems)"
+        required: false
+        type: string
+      task:
+        description: "Task type (train/inference/hetero_train)"
+        required: false
+        type: string
+      model:
+        description: "Model name (e.g., mixtral, deepseek)"
+        required: false
+        type: string
+      case:
+        description: "Specific test case YAML path (relative to flagos-user-tests/)"
+        required: false
+        type: string
+
+defaults:
+  run:
+    working-directory: flagos-user-tests
+
+jobs:
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.resolve.outputs.matrix }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: pip install pyyaml
+
+      - name: Detect changed repos
+        id: detect
+        uses: actions/github-script@v7
+        env:
+          INPUT_CASE: ${{ inputs.case }}
+          INPUT_REPO: ${{ inputs.repo }}
+          INPUT_TASK: ${{ inputs.task }}
+          INPUT_MODEL: ${{ inputs.model }}
+        with:
+          script: |
+            const run = require('./.github/scripts/detect_changed_repos.js');
+            await run({ github, context, core });
+
+      - name: Resolve resources to matrix
+        id: resolve
+        working-directory: flagos-user-tests
+        run: |
+          python tools/resolve_matrix.py \
+            --changed-cases '${{ steps.detect.outputs.changed_cases }}' \
+            --changed-repos '${{ steps.detect.outputs.changed_repos }}' \
+            --changed-repos-list '${{ steps.detect.outputs.changed_repos_list }}'
+
+  run-tests:
+    name: ${{ matrix.repo }}/${{ matrix.task }}/${{ matrix.model }}
+    needs: detect-changes
+    if: ${{ needs.detect-changes.outputs.matrix != '' && !contains(needs.detect-changes.outputs.matrix, '_none_') }}
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.detect-changes.outputs.matrix) }}
+    runs-on: ${{ fromJson(matrix.runner_labels) }}
+    container:
+      image: ${{ matrix.container_image }}
+      options: ${{ matrix.container_options }}
+      volumes: ${{ fromJson(matrix.container_volumes) }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install runner dependencies
+        shell: bash
+        run: |
+          if [ -n "${{ matrix.conda_env }}" ]; then
+            source tools/activate_conda.sh ${{ matrix.conda_env }}
+          fi
+          pip install pyyaml
+
+      - name: Run user tests
+        shell: bash
+        run: |
+          if [ -n "${{ matrix.conda_env }}" ]; then
+            source tools/activate_conda.sh ${{ matrix.conda_env }}
+          fi
+          ARGS=""
+          if [ -n "${{ matrix.case_path }}" ]; then
+            ARGS="--case ${{ matrix.case_path }}"
+          else
+            ARGS="--repo ${{ matrix.repo }}"
+            [ -n "${{ matrix.task }}" ] && ARGS="$ARGS --task ${{ matrix.task }}"
+            [ -n "${{ matrix.model }}" ] && ARGS="$ARGS --model ${{ matrix.model }}"
+          fi
+          python tools/run_user_tests.py $ARGS
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..71bc36f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+**pycache**
\ No newline at end of file
diff --git a/flagos-user-tests/CONTRIBUTING.md b/flagos-user-tests/CONTRIBUTING.md
new file mode 100644
index 0000000..2c606b2
--- /dev/null
+++ b/flagos-user-tests/CONTRIBUTING.md
@@ -0,0 +1,76 @@
+# Contributing to FlagOS User Tests
+
+Thank you for contributing test cases to the FlagOS ecosystem!
+
+## How to Submit a Test Case
+
+### Step 1: Generate a Template
+
+Use the built-in generator to create a properly structured test case:
+
+```bash
+# FlagScale training test case
+python tools/generators/create_test_template.py \
+    --repo flagscale \
+    --type train \
+    --model <model_name> \
+    --name <test_case_name>
+
+# Other repositories
+python tools/generators/create_test_template.py \
+    --repo <repo_name> \
+    --name <test_case_name>
+```
+
+### Step 2: Complete the Test Case
+
+1. **Edit the YAML config** with your actual test parameters
+2. **Add gold values** from a verified local run (JSON format)
+3. **Complete the README.md** with:
+   - Description of what the test validates
+   - Environment requirements (GPU, CUDA, Python)
+   - Manual execution instructions
+
+### Step 3: Validate Locally
+
+```bash
+python tools/validators/validate_config.py
+python tools/validators/validate_gold_values.py
+python tools/validators/lint_test_case.py --strict
+```
+
+### Step 4: Submit a Pull Request
+
+1. Fork this repository
+2. Create a feature branch: `git checkout -b add-test/<repo>/<name>`
+3. Add your test case files
+4. Commit and push
+5. Open a Pull Request using the provided template
+
+## Test Case Requirements
+
+- Each test case must be in its own directory
+- Each directory must contain:
+  - At least one `.yaml` configuration file
+  - A `README.md` with test documentation
+  - Gold values JSON file (for regression tests)
+- No sensitive data (tokens, passwords, private paths) in any files
+- YAML must pass schema validation
+- Gold values must contain numeric arrays
+
+## Code Review
+
+- PRs are reviewed by the respective team CODEOWNERS
+- CI must pass before merge
+- At least one approval from a maintainer is required
+
+## Experimental Test Cases
+
+If your test case covers a new or unstable feature:
+- Place it under `tests/experimental/`
+- It will only run in nightly integration tests
+- It will not block PR merges
+
+## Questions?
+
+Open an issue using the "New Test Case" template or contact the DevOps team.
diff --git a/flagos-user-tests/README.md b/flagos-user-tests/README.md
new file mode 100644
index 0000000..9ae4f32
--- /dev/null
+++ b/flagos-user-tests/README.md
@@ -0,0 +1,69 @@
+# FlagOS User Tests
+
+User-perspective test cases for FlagOS repositories. Each test case defines its own setup, run, and verification — exactly as a real user would operate.
+
+## How It Works
+
+```
+User submits test case YAML:
+  setup: [pip install flagscale]
+  run:   [flagscale train mixtral --config ./conf/xxx.yaml]
+  verify: {log_path: ..., gold_values_path: ...}
+
+CI runner:
+  1. cd <test_case_dir>
+  2. Execute setup commands
+  3. Execute run commands
+  4. Extract metrics from log
+  5. Compare against gold values → PASS/FAIL
+```
+
+Users have full control — the runner does NOT call internal repo scripts.
+
+## Quick Start
+
+```bash
+# Generate template
+python tools/generators/create_test_template.py \
+    --repo flagscale --type train --model llama2 --name tp2_pp1
+
+# Validate
+python tools/validators/validate_config.py
+python tools/validators/validate_gold_values.py
+python tools/validators/lint_test_case.py --strict
+
+# Run locally
+python tools/run_user_tests.py \
+    --case tests/flagscale/train/llama2/tp2_pp1/tp2_pp1.yaml
+```
+
+See [docs/getting_started.md](docs/getting_started.md) for the full guide.
+
+## Test Case Structure (FlagScale Example)
+
+```
+tests/flagscale/train/mixtral/tp2_pp1_ep2/
+├── tp2_pp1_ep2.yaml           # Test case: setup → run → verify
+├── conf/                      # FlagScale configs (user provides)
+│   ├── tp2_pp1_ep2.yaml
+│   └── train/tp2_pp1_ep2.yaml
+├── gold_values/               # Expected metrics
+│   └── tp2_pp1_ep2.json
+└── README.md
+```
+
+## Supported Repositories
+
+FlagScale, FlagGems, FlagCX, FlagTree, vLLM-FL, vLLM-plugin-FL, TE-FL, Megatron-LM-FL
+
+## CI Workflows (in `../.github/workflows/`)
+
+| Workflow | Trigger | Description |
+|---|---|---|
+| PR Validation | Pull Request | Format, lint, gold values checks |
+| Test Dispatch | Push/PR | Run user-defined setup → run → verify |
+| Nightly | Daily 02:00 UTC | All test cases |
+
+## Contributing
+
+See [CONTRIBUTING.md](CONTRIBUTING.md).
diff --git a/flagos-user-tests/docs/getting_started.md b/flagos-user-tests/docs/getting_started.md
new file mode 100644
index 0000000..1025909
--- /dev/null
+++ b/flagos-user-tests/docs/getting_started.md
@@ -0,0 +1,99 @@
+# Getting Started
+
+## Overview
+
+`flagos-user-tests` manages **user-perspective** test cases for FlagOS repositories. Each test case defines its own setup, run, and verification commands — exactly as a real user would operate.
+
+## Quick Start
+
+### 1. Generate a template
+
+```bash
+# FlagScale training test
+python tools/generators/create_test_template.py \
+    --repo flagscale --type train --model llama2 --name tp2_pp1
+
+# Generic test
+python tools/generators/create_test_template.py \
+    --repo flaggems --name my_operator_test
+```
+
+### 2. Edit the generated files
+
+The test case YAML defines the user workflow:
+
+```yaml
+# tests/flagscale/train/llama2/tp2_pp1/tp2_pp1.yaml
+meta:
+  repo: flagscale
+  task: train
+  model: llama2
+  case: tp2_pp1
+  description: "LLaMA2 training with TP=2, PP=1"
+
+resources:
+  gpu: A100-80GB
+  gpu_count: 8
+
+setup:
+  - pip install flagscale                    # user installs the package
+
+run:
+  - flagscale train llama2 --config ./conf/tp2_pp1.yaml   # user runs training
+
+verify:
+  log_path: ".../stdout.log"                 # where to find output
+  gold_values_path: ./gold_values/tp2_pp1.json   # expected metrics
+```
+
+Also edit the FlagScale config files (`conf/*.yaml`) and fill in gold values from a verified run.
+
+### 3. Validate locally
+
+```bash
+python tools/validators/validate_config.py
+python tools/validators/validate_gold_values.py
+python tools/validators/lint_test_case.py --strict
+```
+
+### 4. Run locally (optional)
+
+```bash
+python tools/run_user_tests.py \
+    --case tests/flagscale/train/llama2/tp2_pp1/tp2_pp1.yaml
+```
+
+### 5. Submit a PR
+
+CI will automatically:
+1. Validate format (PR Validation workflow)
+2. Run your test case on real hardware (Test Dispatch workflow)
+
+## How the Runner Works
+
+`run_user_tests.py` is a **generic executor**:
+
+```
+┌─────────────┐     ┌──────────────────────────────────────┐
+│  Test Case   │ ──▶ │ 1. cd <test_case_dir>                │
+│  YAML        │     │ 2. Execute setup commands             │
+│              │     │ 3. Execute run commands                │
+│              │     │ 4. Find log file (glob pattern)        │
+│              │     │ 5. Extract metrics from log            │
+│              │     │ 6. Compare against gold values          │
+└─────────────┘     └──────────────────────────────────────┘
+```
+
+It does **not** call any internal repo scripts. Users have full control over:
+- What to install (`setup`)
+- How to run (`run`)
+- What to verify (`verify`)
+- Machine requirements (`resources`) — mapped to runner labels via `resource_map.yaml`
+
+## CI Workflows
+
+| Workflow | Trigger | Description |
+|---|---|---|
+| PR Validation | Pull Request | Format/lint/gold-values checks |
+| Test Dispatch | Push to main / PR | Runs user-defined setup → run → verify |
+| Nightly | Daily 02:00 UTC | All test cases across all repos |
diff --git a/flagos-user-tests/docs/test_format_spec.md b/flagos-user-tests/docs/test_format_spec.md
new file mode 100644
index 0000000..a916e0d
--- /dev/null
+++ b/flagos-user-tests/docs/test_format_spec.md
@@ -0,0 +1,177 @@
+# Test Format Specification
+
+## Core Concept: User-Perspective Test Cases
+
+Every test case is a **self-contained YAML file** that defines the complete workflow from a **user's perspective**:
+
+```yaml
+meta:       # What is this test?
+resources:  # Hardware requirements (platform, device, device_count)
+setup:      # How to install? (user's commands)
+run:        # How to run?    (user's commands)
+verify:     # How to check?  (gold values comparison)
+```
+
+The runner (`run_user_tests.py`) simply executes these user-defined commands. It does NOT call any internal repo scripts — giving users full control and matching real usage scenarios.
+
+## Test Case YAML Format
+
+### Complete Example (FlagScale)
+
+```yaml
+meta:
+  repo: flagscale
+  task: train
+  model: mixtral
+  case: tp2_pp1_ep2
+  description: "Mixtral MoE training with TP=2, PP=1, EP=2"
+
+resources:
+  platform: cuda
+  device: A100-80GB
+  device_count: 8
+
+env:
+  CUDA_VISIBLE_DEVICES: "0,1,2,3,4,5,6,7"
+
+setup:
+  - pip install flagscale
+
+run:
+  - flagscale train mixtral --config ./conf/tp2_pp1_ep2.yaml
+
+verify:
+  log_path: "tests/functional_tests/train/mixtral/test_results/tp2_pp1_ep2/logs/details/host_0_localhost/*/default_*/attempt_0/*/stdout.log"
+  gold_values_path: ./gold_values/tp2_pp1_ep2.json
+  rtol: 1e-5
+  atol: 0
+```
+
+### Complete Example (Generic)
+
+```yaml
+meta:
+  repo: flaggems
+  case: my_operator_test
+  description: "Test custom operator correctness"
+
+setup:
+  - pip install flaggems
+
+run:
+  - pytest -v tests/test_my_operator.py
+
+# No verify step — pytest exit code determines pass/fail
+```
+
+### Field Reference
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `meta.repo` | string | Yes | Target FlagOS repository name |
+| `meta.task` | string | No | Task type (train/inference/hetero_train) |
+| `meta.model` | string | No | Model name |
+| `meta.case` | string | No | Case name (for filtering) |
+| `meta.description` | string | Yes | What this test validates |
+| `resources` | object | No | Hardware requirements |
+| `resources.platform` | string | No | Chip platform: `cuda`, `metax`, `ascend` (default: `cuda`) |
+| `resources.device` | string | No | Device type (e.g. `A100-40GB`, `C500`, `Ascend910B`) |
+| `resources.device_count` | int | No | Number of devices required |
+| `env` | object | No | Environment variables |
+
+### Resource Resolution
+
+The `resources` field drives CI decisions via `resource_map.yaml` (platform-based):
+
+1. **Runner selection**: `resources.platform` + `resources.device` -> platform-specific runner labels
+2. **Container image**: `resources.platform` + `meta.repo/task` -> platform-specific Docker image
+3. **Container options**: `resources.platform` -> device passthrough flags (`--gpus all`, `--device /dev/davinci_all`, etc.)
+
+Supported platforms:
+
+| Platform | Vendor | Devices | Status |
+|---|---|---|---|
+| `cuda` | NVIDIA | A100, H100, H800 | Active |
+| `metax` | MetaX (Muxi) | C500 | Planned |
+| `ascend` | Huawei | Ascend910B, Ascend910C | Planned |
+
+The test job runs inside the platform-resolved Docker container with device access.
+
+### Field Reference (continued)
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `setup` | list[str] | No | Shell commands for environment setup |
+| `run` | list[str] | Yes | Shell commands to execute the test |
+| `verify.log_path` | string | No | Path to output log (supports glob patterns) |
+| `verify.gold_values_path` | string | No | Path to gold values JSON file |
+| `verify.gold_values` | object | No | Inline gold values (alternative to file) |
+| `verify.rtol` | float | No | Relative tolerance (default: 1e-5) |
+| `verify.atol` | float | No | Absolute tolerance (default: 0) |
+
+### Working Directory
+
+All commands execute with the **test case directory** as the working directory. So `./conf/tp2_pp1_ep2.yaml` resolves relative to where the test case YAML lives.
+
+## Gold Values Format
+
+### Numeric (default)
+
+```json
+{
+  "lm loss:": {
+    "values": [11.17587, 11.16908, 10.41927]
+  }
+}
+```
+
+- Keys are metric names extracted from log files
+- Values are numeric arrays
+- Comparison uses `rtol` / `atol` similar to `numpy.allclose`
+- `log_path` supports glob patterns for timestamp directories
+
+### Text
+
+```json
+{
+  "inference_output": {
+    "type": "text",
+    "pattern": "output\\.outputs\\[0\\]\\.text=(?:\"(.+?)\"$|'(.+?)'$)",
+    "values": [
+      " Lina. I'm a 22-year",
+      " the same as the president of the United Nations."
+    ]
+  }
+}
+```
+
+- Set `"type": "text"` to enable text comparison
+- `"pattern"` is a regex with capture group(s) to extract text from log lines
+  - If multiple groups (e.g. alternation), the first non-None group is used
+- Values are compared with exact string match
+
+## FlagScale Test Case Directory Structure
+
+```
+tests/flagscale/train/mixtral/tp2_pp1_ep2/
+├── tp2_pp1_ep2.yaml              # Test case definition (setup/run/verify)
+├── conf/
+│   ├── tp2_pp1_ep2.yaml          # FlagScale experiment config (Hydra)
+│   └── train/
+│       └── tp2_pp1_ep2.yaml      # Training parameters
+├── gold_values/
+│   └── tp2_pp1_ep2.json          # Expected metrics
+└── README.md
+```
+
+The user runs: `pip install flagscale && flagscale train mixtral --config ./conf/tp2_pp1_ep2.yaml`
+
+## README Requirements
+
+Each test case directory must have a `README.md` with:
+1. **Description** section
+2. **Environment** section
+
+## Experimental Test Cases
+
+Place under `tests/experimental/` for gray-stage tests (nightly only, non-blocking).
diff --git a/flagos-user-tests/repos.yaml b/flagos-user-tests/repos.yaml
new file mode 100644
index 0000000..fe7f816
--- /dev/null
+++ b/flagos-user-tests/repos.yaml
@@ -0,0 +1,45 @@
+# FlagOS target repository configuration
+#
+# Note: Each test case defines its own setup/run/verify workflow (user perspective).
+# This file only records basic repository info for CI repo-level filtering and issue templates.
+
+repositories:
+  flagscale:
+    url: https://github.com/flagos-ai/FlagScale.git
+    default_branch: main
+    description: Large-scale distributed training framework
+
+  flaggems:
+    url: https://github.com/flagos-ai/FlagGems.git
+    default_branch: main
+    description: GPU-accelerated math library
+
+  flagcx:
+    url: https://github.com/flagos-ai/FlagCX.git
+    default_branch: main
+    description: Cross-chip communication library
+
+  flagtree:
+    url: https://github.com/flagos-ai/FlagTree.git
+    default_branch: main
+    description: Tree-structured computation library
+
+  vllm-fl:
+    url: https://github.com/flagos-ai/vLLM-FL.git
+    default_branch: main
+    description: LLM inference engine
+
+  vllm-plugin-fl:
+    url: https://github.com/flagos-ai/vLLM-plugin-FL.git
+    default_branch: main
+    description: vLLM plugin system
+
+  te-fl:
+    url: https://github.com/flagos-ai/TransformerEngine-FL.git
+    default_branch: main
+    description: Transformer Engine
+
+  megatron-lm-fl:
+    url: https://github.com/flagos-ai/Megatron-LM-FL.git
+    default_branch: main
+    description: Megatron-LM fork
diff --git a/flagos-user-tests/resource_map.yaml b/flagos-user-tests/resource_map.yaml
new file mode 100644
index 0000000..30f5f75
--- /dev/null
+++ b/flagos-user-tests/resource_map.yaml
@@ -0,0 +1,104 @@
+# Maps test case resource requirements to GitHub Actions runner labels and container images.
+#
+# Architecture: platform-based multi-vendor support
+#   resources.platform  -> platforms.<name>  -> runner labels, container images, options
+#
+# Example test case YAML:
+#   resources:
+#     platform: cuda
+#     device: A100-40GB
+#     device_count: 1
+#
+# Resolution chain:
+#   1. resources.platform -> platforms.cuda
+#   2. platforms.cuda.device_labels["A100-40GB"] -> runner labels
+#   3. platforms.cuda.container_images["flagscale/inference"] -> Docker image
+#   4. platforms.cuda.container_options -> Docker runtime flags
+
+# =============================================================================
+# Platforms: each vendor/chip family is a platform
+# =============================================================================
+platforms:
+
+  # ---------------------------------------------------------------------------
+  # NVIDIA CUDA platform
+  # ---------------------------------------------------------------------------
+  cuda:
+    description: "NVIDIA CUDA GPUs (A100, H100, H800, etc.)"
+
+    # Device type -> self-hosted runner labels
+    device_labels:
+      A100-40GB:  [self-hosted, Linux, X64, gpu-a100-40gb]
+      A100-80GB:  [self-hosted, Linux, X64, gpu-a100-80gb]
+      H100-80GB:  [self-hosted, Linux, X64, gpu-h100-80gb]
+      H800-80GB:  [self-hosted, Linux, X64, gpu-h800-80gb]
+
+    # Default runner labels when device type not found
+    default_labels: [self-hosted, Linux, X64]
+
+    # Container images: "<repo>/<task>" -> Docker image
+    container_images:
+      flagscale/train:        "localhost:5000/flagscale-train:dev-cu128-py3.12-20260228210721"
+      flagscale/inference:    "localhost:5000/flagscale-inference:dev-cu128-py3.12-20260302102033"
+      flagscale/hetero_train: "localhost:5000/flagscale-train:dev-cu128-py3.12-20260228210721"
+
+    # Conda environment to activate inside the container before test execution.
+    # Same key format as container_images: "<repo>/<task>" | "<repo>" | "default"
+    conda_env:
+      flagscale/inference: "flagscale-inference"
+
+    # Container runtime options
+    container_options: "--gpus all --shm-size=500g --user root --ulimit nofile=65535:65535"
+
+    # Container volume mounts (host:container)
+    container_volumes:
+      - /home/flagscale_cicd/docker/docker_build/docker_data:/home/gitlab-runner/data
+      - /home/flagscale_cicd/docker/docker_build/docker_tokenizers:/home/gitlab-runner/tokenizers
+
+  # ---------------------------------------------------------------------------
+  # MetaX (Muxi) platform — placeholder for future integration
+  # ---------------------------------------------------------------------------
+  metax:
+    description: "MetaX (Muxi) GPUs (C500, etc.)"
+
+    device_labels:
+      C500: [self-hosted, Linux, X64, metax-c500]
+
+    default_labels: [self-hosted, Linux, X64, metax]
+
+    container_images: {}
+      # flagscale/train: "registry.example.com/flagscale-train:metax-..."
+      # flagscale/inference: "registry.example.com/flagscale-inference:metax-..."
+
+    container_options: "--device /dev/mxgpu_all --shm-size=500g --user root"
+
+    container_volumes: []
+
+  # ---------------------------------------------------------------------------
+  # Ascend (Huawei) platform — placeholder for future integration
+  # ---------------------------------------------------------------------------
+  ascend:
+    description: "Huawei Ascend NPUs (910B, 910C, etc.)"
+
+    device_labels:
+      Ascend910B: [self-hosted, Linux, aarch64, ascend-910b]
+      Ascend910C: [self-hosted, Linux, aarch64, ascend-910c]
+
+    default_labels: [self-hosted, Linux, aarch64, ascend]
+
+    container_images: {}
+      # flagscale/train: "registry.example.com/flagscale-train:ascend-..."
+
+    container_options: "--device /dev/davinci_all --shm-size=500g --user root"
+
+    container_volumes: []
+
+# =============================================================================
+# Global defaults
+# =============================================================================
+
+# Default platform when resources.platform is not specified
+default_platform: cuda
+
+# Fallback runner labels when nothing matches
+default_labels: [self-hosted]
diff --git a/flagos-user-tests/tests/flagcx/.gitkeep b/flagos-user-tests/tests/flagcx/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/flaggems/.gitkeep b/flagos-user-tests/tests/flaggems/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/flagscale/hetero_train/.gitkeep b/flagos-user-tests/tests/flagscale/hetero_train/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/.gitignore b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/.gitignore
new file mode 100644
index 0000000..2301c87
--- /dev/null
+++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/.gitignore
@@ -0,0 +1,2 @@
+FlagScale/
+outputs/
diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md
new file mode 100644
index 0000000..491ebcd
--- /dev/null
+++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/README.md
@@ -0,0 +1,25 @@
+# demo_0_6b
+
+## Description
+
+FlagScale inference demo using Qwen3-0.6B model with vLLM backend.
+Runs 4 prompts with greedy decoding (temperature=0, max_tokens=10) and verifies output text against gold values.
+
+## Environment
+
+- GPU: 1x A100 40GB
+- CUDA: 12.1+
+- Python: 3.12
+- vLLM: 0.10.1.dev
+
+## How to Run
+
+```bash
+git clone https://github.com/flagos-ai/FlagScale.git && cd FlagScale && pip install .
+flagscale inference qwen3 --config ./conf/demo_0_6b.yaml
+```
+
+## Gold Values
+
+Uses text-type gold values to verify inference output.
+Greedy decoding (temperature=0) produces deterministic output, so text comparison is exact match.
diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/demo_0_6b.yaml
new file mode 100644
index 0000000..0f15416
--- /dev/null
+++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/demo_0_6b.yaml
@@ -0,0 +1,27 @@
+defaults:
+  - _self_
+  - inference: demo_0_6b
+
+experiment:
+  exp_name: qwen3
+  exp_dir: ./outputs/${experiment.exp_name}
+  task:
+    type: inference
+    backend: vllm
+    entrypoint: flagscale/inference/inference_llm.py
+  runner:
+    hostfile: null
+  cmds:
+    before_start: source /root/miniconda3/bin/activate flagscale-inference
+  envs:
+    VLLM_PLUGINS: "fl"
+    VLLM_USE_FLASHINFER_SAMPLER: 0
+    VLLM_LOGGING_LEVEL: "INFO"
+    CUDA_VISIBLE_DEVICES: 0
+    CUDA_DEVICE_MAX_CONNECTIONS: 1
+
+action: run
+
+hydra:
+  run:
+    dir: ${experiment.exp_dir}/hydra
diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml
new file mode 100644
index 0000000..f1ce909
--- /dev/null
+++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/conf/inference/demo_0_6b.yaml
@@ -0,0 +1,18 @@
+llm:
+  model: /home/gitlab-runner/data/Qwen3-0.6B
+  trust_remote_code: true
+  tensor_parallel_size: 1
+  pipeline_parallel_size: 1
+  gpu_memory_utilization: 0.9
+  seed: 1234
+
+generate:
+  prompts: [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+  ]
+  sampling:
+    max_tokens: 10
+    temperature: 0.0
diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml
new file mode 100644
index 0000000..1399352
--- /dev/null
+++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml
@@ -0,0 +1,30 @@
+meta:
+  repo: flagscale
+  task: inference
+  model: qwen3
+  case: demo_0_6b
+  description: >
+    Qwen3-0.6B inference demo using vLLM backend with FlagScale CLI.
+    Runs 4 prompts with greedy decoding (temperature=0) and verifies output text.
+
+resources:
+  platform: cuda
+  device: A100-40GB
+  device_count: 1
+
+env:
+  CUDA_VISIBLE_DEVICES: "0"
+  VLLM_PLUGINS: "fl"
+  VLLM_USE_FLASHINFER_SAMPLER: "0"
+  VLLM_LOGGING_LEVEL: "INFO"
+  CUDA_DEVICE_MAX_CONNECTIONS: "1"
+
+setup:
+  - git clone https://github.com/flagos-ai/FlagScale.git && cd FlagScale && pip install .
+
+run:
+  - flagscale inference qwen3 --config ./conf/demo_0_6b.yaml --test
+
+verify:
+  log_path: "./outputs/qwen3/inference_logs/host_0_localhost.output"
+  gold_values_path: ./gold_values/demo_0_6b.json
diff --git a/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/gold_values/demo_0_6b.json b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/gold_values/demo_0_6b.json
new file mode 100644
index 0000000..2a28edd
--- /dev/null
+++ b/flagos-user-tests/tests/flagscale/inference/qwen3/demo_0_6b/gold_values/demo_0_6b.json
@@ -0,0 +1,12 @@
+{
+  "inference_output": {
+    "type": "text",
+    "pattern": "output\\.outputs\\[0\\]\\.text=(?:\"(.+?)\"$|'(.+?)'$)",
+    "values": [
+      " Lina. I'm a 22-year",
+      " the same as the president of the United Nations.",
+      " Paris. The capital of France is also the capital",
+      " not just a technological challenge but a profound transformation of"
+    ]
+  }
+}
diff --git a/flagos-user-tests/tests/flagscale/train/.gitkeep b/flagos-user-tests/tests/flagscale/train/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/flagtree/.gitkeep b/flagos-user-tests/tests/flagtree/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/megatron-lm-fl/.gitkeep b/flagos-user-tests/tests/megatron-lm-fl/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/te-fl/.gitkeep b/flagos-user-tests/tests/te-fl/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/vllm-fl/.gitkeep b/flagos-user-tests/tests/vllm-fl/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tests/vllm-plugin-fl/.gitkeep b/flagos-user-tests/tests/vllm-plugin-fl/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/flagos-user-tests/tools/activate_conda.sh b/flagos-user-tests/tools/activate_conda.sh
new file mode 100755
index 0000000..a8e11bd
--- /dev/null
+++ b/flagos-user-tests/tools/activate_conda.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Activate a conda environment inside a container.
+#
+# Detects conda installation, initializes the shell, then activates the env.
+# Must be sourced (not executed) so the activation persists in the caller's shell:
+#   source tools/activate_conda.sh <env_name> [conda_path]
+#
+# Arguments:
+#   env_name    — conda environment name (required)
+#   conda_path  — path to conda installation (optional, auto-detected if omitted)
+
+set -e
+
+_activate_conda() {
+    local env_name="${1:?Usage: source activate_conda.sh <env_name> [conda_path]}"
+    local conda_path="${2:-}"
+
+    # Auto-detect conda path if not provided
+    if [ -z "$conda_path" ]; then
+        if [ -n "$CONDA_DIR" ] && [ -d "$CONDA_DIR" ]; then
+            conda_path="$CONDA_DIR"
+        elif command -v conda &>/dev/null; then
+            conda_path="$(conda info --base 2>/dev/null)"
+        elif [ -d "$HOME/miniconda3" ]; then
+            conda_path="$HOME/miniconda3"
+        elif [ -d "$HOME/anaconda3" ]; then
+            conda_path="$HOME/anaconda3"
+        elif [ -d "/opt/conda" ]; then
+            conda_path="/opt/conda"
+        fi
+    fi
+
+    if [ -z "$conda_path" ]; then
+        echo "[activate_conda] WARNING: conda not found, skipping activation"
+        return 0
+    fi
+
+    local conda_sh="$conda_path/etc/profile.d/conda.sh"
+    if [ ! -f "$conda_sh" ]; then
+        echo "[activate_conda] ERROR: conda.sh not found at $conda_sh"
+        return 1
+    fi
+
+    # Initialize conda for this shell
+    echo "[activate_conda] Initializing conda from $conda_path"
+    source "$conda_sh"
+
+    # Activate the environment
+    echo "[activate_conda] Activating environment: $env_name"
+    conda activate "$env_name" || {
+        echo "[activate_conda] ERROR: Failed to activate conda env '$env_name'"
+        return 1
+    }
+
+    echo "[activate_conda] Active Python: $(which python) ($(python --version 2>&1))"
+}
+
+_activate_conda "$@"
diff --git a/flagos-user-tests/tools/collect_test_cases.py b/flagos-user-tests/tools/collect_test_cases.py
new file mode 100644
index 0000000..e349a38
--- /dev/null
+++ b/flagos-user-tests/tools/collect_test_cases.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""Collect all test cases and output a JSON report for post-benchmark-report action.
+
+Output format (object-of-objects, keyed by case_id):
+    {
+      "tests/flagscale/inference/qwen3/demo_0_6b/demo_0_6b.yaml": {
+        "case_name": "flagscale-inference-qwen3-demo_0_6b",
+        "repo": "flagscale",
+        "updated_at": "2026-03-18T15:02:29+08:00"
+      },
+      ...
+    }
+
+Usage:
+    python tools/collect_test_cases.py --root . --output report.json
+"""
+
+import argparse
+import json
+import subprocess
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+import yaml
+
+
+def get_file_updated_time(filepath: Path) -> str:
+    """Get the last commit time of a file via git, fallback to mtime."""
+    try:
+        result = subprocess.run(
+            ["git", "log", "-1", "--format=%aI", str(filepath)],
+            capture_output=True, text=True, timeout=10,
+        )
+        if result.returncode == 0 and result.stdout.strip():
+            return result.stdout.strip()
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        pass
+
+    # Fallback to file modification time
+    mtime = filepath.stat().st_mtime
+    return datetime.fromtimestamp(mtime, tz=timezone.utc).strftime("%Y/%m/%d %H:%M:%S")
+
+
+def make_case_id(meta: dict) -> str:
+    """Generate a case ID from meta fields: <repo>-<task>-<model>-<case>."""
+    parts = [
+        meta.get("repo", "unknown"),
+        meta.get("task", ""),
+        meta.get("model", ""),
+        meta.get("case", ""),
+    ]
+    return "-".join(p for p in parts if p)
+
+
+def collect_test_cases(root: Path) -> dict:
+    """Discover all test cases and return report dict keyed by case_id.
+
+    The post-benchmark-report action expects an object-of-objects format where:
+    - Each key maps to header_config[0].field (case_id)
+    - Each value is an object with fields matching header_config[1+]
+    """
+    tests_dir = root / "tests"
+    report = {}
+
+    for yaml_path in sorted(tests_dir.rglob("*.yaml")):
+        if yaml_path.name.startswith("_") or yaml_path.name == "data.yaml":
+            continue
+
+        try:
+            with open(yaml_path) as f:
+                data = yaml.safe_load(f)
+            if not isinstance(data, dict) or "meta" not in data:
+                continue
+
+            meta = data["meta"]
+            case_id = str(yaml_path.relative_to(root))
+            report[case_id] = {
+                "case_name": make_case_id(meta),
+                "repo": meta.get("repo", "unknown"),
+                "updated_at": get_file_updated_time(yaml_path),
+            }
+        except (yaml.YAMLError, KeyError):
+            continue
+
+    return report
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Collect test cases for reporting")
+    parser.add_argument("--root", default=".", help="Root directory of flagos-user-tests")
+    parser.add_argument("--output", default="test_cases_report.json", help="Output JSON file")
+    args = parser.parse_args()
+
+    root = Path(args.root)
+    report = collect_test_cases(root)
+
+    with open(args.output, "w") as f:
+        json.dump(report, f, ensure_ascii=False, indent=2)
+
+    print(f"Collected {len(report)} test case(s) -> {args.output}")
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/flagos-user-tests/tools/generators/create_test_template.py b/flagos-user-tests/tools/generators/create_test_template.py
new file mode 100644
index 0000000..e8e261a
--- /dev/null
+++ b/flagos-user-tests/tools/generators/create_test_template.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""Generate user-perspective test case template.
+
+Usage:
+    # FlagScale training test
+    python create_test_template.py --repo flagscale --type train --model llama2 --name tp2_pp1
+
+    # Generic test
+    python create_test_template.py --repo flaggems --name my_operator_test
+"""
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+import yaml
+
+
+VALID_REPOS = [
+    "flagscale", "flaggems", "flagcx", "flagtree",
+    "vllm-fl", "vllm-plugin-fl", "te-fl", "megatron-lm-fl",
+]
+
+
+def create_flagscale_test_case(task_type: str, model: str, name: str) -> dict:
+    """Generate a FlagScale user-perspective test case YAML."""
+    return {
+        "meta": {
+            "repo": "flagscale",
+            "task": task_type,
+            "model": model,
+            "case": name,
+            "description": "TODO: describe what this test validates",
+        },
+        "resources": {
+            "gpu": "A100-80GB",
+            "gpu_count": 8,
+        },
+        "env": {
+            "CUDA_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7",
+            "CUDA_DEVICE_MAX_CONNECTIONS": "1",
+        },
+        "setup": [
+            "pip install flagscale",
+        ],
+        "run": [
+            f"flagscale {task_type} {model} --config ./conf/{name}.yaml",
+        ],
+        "verify": {
+            "log_path": f"tests/functional_tests/{task_type}/{model}/test_results/{name}/logs/details/host_0_localhost/*/default_*/attempt_0/*/stdout.log",
+            "gold_values_path": f"./gold_values/{name}.json",
+            "rtol": 1e-5,
+            "atol": 0,
+        },
+    }
+
+
+def create_flagscale_experiment_config(model: str, name: str, task_type: str) -> dict:
+    """Generate Hydra experiment config for flagscale CLI."""
+    return {
+        "defaults": ["_self_", {task_type: name}],
+        "experiment": {
+            "exp_name": name,
+            "exp_dir": f"tests/functional_tests/{task_type}/{model}/test_results/{name}",
+            "task": {
+                "type": task_type,
+                "backend": "megatron",
+                "entrypoint": "flagscale/train/megatron/train_gpt.py",
+            },
+            "runner": {"ssh_port": None},
+            "envs": {
+                "CUDA_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7",
+                "CUDA_DEVICE_MAX_CONNECTIONS": "1",
+            },
+        },
+        "action": "run",
+        "hydra": {"run": {"dir": "${experiment.exp_dir}/hydra"}},
+    }
+
+
+def create_flagscale_train_params() -> dict:
+    """Generate training params sub-config."""
+    return {
+        "defaults": ["data"],
+        "system": {
+            "tensor_model_parallel_size": 2,
+            "pipeline_model_parallel_size": 1,
+            "sequence_parallel": True,
+            "use_distributed_optimizer": True,
+            "precision": {"bf16": True},
+            "logging": {"log_interval": 1},
+            "checkpoint": {"no_save_optim": True, "no_save_rng": True, "save_interval": 100000},
+        },
+        "model": {
+            "num_layers": 2,
+            "hidden_size": 4096,
+            "num_attention_heads": 32,
+            "seq_length": 2048,
+        },
+    }
+
+
+def create_generic_test_case(repo: str, name: str) -> dict:
+    """Generate a generic user-perspective test case YAML."""
+    return {
+        "meta": {
+            "repo": repo,
+            "case": name,
+            "description": "TODO: describe what this test validates",
+        },
+        "resources": {},
+        "setup": [
+            f"pip install {repo.replace('-', '_')}",
+        ],
+        "run": [
+            "pytest -v",
+        ],
+    }
+
+
+def create_readme(repo: str, task_type: str, model: str, name: str) -> str:
+    if repo == "flagscale":
+        return f"""# {name}
+
+## Description
+
+TODO: Describe what this test case validates.
+
+## Environment
+
+- GPU: 8x A100 80GB
+- CUDA: 12.1+
+- Python: 3.10
+
+## How to Run
+
+```bash
+pip install flagscale
+flagscale {task_type} {model} --config ./conf/{name}.yaml
+```
+
+## Gold Values
+
+TODO: Describe expected values and tolerance.
+"""
+    return f"""# {name}
+
+## Description
+
+TODO: Describe what this test case validates.
+
+## Environment
+
+- Python: 3.10
+
+## How to Run
+
+```bash
+pip install {repo}
+pytest -v
+```
+"""
+
+
+def dump_yaml(data: dict, path: Path):
+    os.makedirs(path.parent, exist_ok=True)
+    with open(path, "w") as f:
+        yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate test case template")
+    parser.add_argument("--repo", required=True, choices=VALID_REPOS)
+    parser.add_argument("--type", default="train")
+    parser.add_argument("--model", default="")
+    parser.add_argument("--name", required=True)
+    parser.add_argument("--output", default=".")
+    args = parser.parse_args()
+
+    root = Path(args.output)
+
+    if args.repo == "flagscale":
+        if not args.model:
+            print("FlagScale test cases require --model"); sys.exit(1)
+
+        case_dir = root / "tests" / args.repo / args.type / args.model / args.name
+
+        # Main test case YAML (user-perspective)
+        tc = create_flagscale_test_case(args.type, args.model, args.name)
+        dump_yaml(tc, case_dir / f"{args.name}.yaml")
+
+        # Hydra experiment config
+        ec = create_flagscale_experiment_config(args.model, args.name, args.type)
+        dump_yaml(ec, case_dir / "conf" / f"{args.name}.yaml")
+
+        # Training params sub-config
+        tp = create_flagscale_train_params()
+        dump_yaml(tp, case_dir / "conf" / "train" / f"{args.name}.yaml")
+
+        # Gold values
+        gold = {"lm loss:": {"values": [0.0] * 10}}
+        gold_path = case_dir / "gold_values" / f"{args.name}.json"
+        os.makedirs(gold_path.parent, exist_ok=True)
+        with open(gold_path, "w") as f:
+            json.dump(gold, f, indent=2)
+
+        # README
+        readme = create_readme(args.repo, args.type, args.model, args.name)
+        with open(case_dir / "README.md", "w") as f:
+            f.write(readme)
+
+        print(f"Created FlagScale test case at: {case_dir}")
+        print(f"  {args.name}.yaml          — test case (setup/run/verify)")
+        print(f"  conf/{args.name}.yaml     — FlagScale experiment config")
+        print(f"  conf/train/{args.name}.yaml — training parameters")
+        print(f"  gold_values/{args.name}.json — expected metrics")
+        print(f"  README.md")
+    else:
+        case_dir = root / "tests" / args.repo / args.name
+        tc = create_generic_test_case(args.repo, args.name)
+        dump_yaml(tc, case_dir / f"{args.name}.yaml")
+
+        readme = create_readme(args.repo, "", "", args.name)
+        with open(case_dir / "README.md", "w") as f:
+            f.write(readme)
+
+        print(f"Created test case at: {case_dir}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/flagos-user-tests/tools/resolve_matrix.py b/flagos-user-tests/tools/resolve_matrix.py
new file mode 100644
index 0000000..3d2a549
--- /dev/null
+++ b/flagos-user-tests/tools/resolve_matrix.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""Resolve test case resources into a GitHub Actions matrix.
+
+Reads detection outputs (changed_cases / changed_repos / changed_repos_list)
+and produces a JSON matrix with runner_labels, container_image, container_options,
+and container_volumes per test case entry.
+
+Usage (from workflow):
+    python tools/resolve_matrix.py \
+      --changed-cases '${{ steps.detect.outputs.changed_cases }}' \
+      --changed-repos '${{ steps.detect.outputs.changed_repos }}' \
+      --changed-repos-list '${{ steps.detect.outputs.changed_repos_list }}'
+"""
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from run_user_tests import (
+    list_test_resources,
+    resolve_conda_env,
+    resolve_container_image,
+    resolve_container_options,
+    resolve_runner_labels,
+)
+
+import yaml
+
+
+def make_entry(case_path: str, meta: dict, resources: dict, resource_map_path: Path) -> dict:
+    """Build a matrix entry with runner labels and per-platform container config."""
+    labels = resolve_runner_labels(resources, resource_map_path)
+    image = resolve_container_image(
+        meta.get("repo", ""), meta.get("task", ""),
+        resources, resource_map_path,
+    )
+    init_cmd = resolve_conda_env(
+        meta.get("repo", ""), meta.get("task", ""),
+        resources, resource_map_path,
+    )
+    opts = resolve_container_options(resources, resource_map_path)
+    return {
+        "case_path": case_path,
+        "repo": meta.get("repo", ""),
+        "task": meta.get("task", ""),
+        "model": meta.get("model", ""),
+        "runner_labels": json.dumps(labels),
+        "container_image": image,
+        "conda_env": init_cmd,
+        "container_options": opts["container_options"],
+        "container_volumes": json.dumps(opts["container_volumes"]),
+    }
+
+
+def make_empty_entry(**kwargs) -> dict:
+    """Build a placeholder entry with defaults."""
+    return {
+        "case_path": "", "repo": "", "task": "", "model": "",
+        "runner_labels": json.dumps(["self-hosted"]),
+        "container_image": "", "conda_env": "",
+        "container_options": "",
+        "container_volumes": json.dumps([]),
+        **kwargs,
+    }
+
+
+def resource_entry_to_matrix(entry: dict, repo: str = "", task: str = "", model: str = "") -> dict:
+    """Convert a list_test_resources entry to a matrix entry."""
+    return {
+        "case_path": entry["case_path"],
+        "repo": entry.get("repo", "") or repo or "",
+        "task": entry.get("task", "") or task or "",
+        "model": entry.get("model", "") or model or "",
+        "runner_labels": json.dumps(entry["runner_labels"]),
+        "container_image": entry.get("container_image", ""),
+        "conda_env": entry.get("conda_env", ""),
+        "container_options": entry.get("container_options", ""),
+        "container_volumes": json.dumps(entry.get("container_volumes", [])),
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Resolve test resources to CI matrix")
+    parser.add_argument("--changed-cases", default="")
+    parser.add_argument("--changed-repos", default="")
+    parser.add_argument("--changed-repos-list", default="")
+    parser.add_argument("--root", default=".", help="Root directory of flagos-user-tests")
+    args = parser.parse_args()
+
+    root = Path(args.root)
+    resource_map_path = root / "resource_map.yaml"
+    matrix_entries = []
+
+    if args.changed_cases:
+        cases = json.loads(args.changed_cases)
+        for case_path in cases:
+            p = root / case_path if not Path(case_path).is_absolute() else Path(case_path)
+            if p.exists():
+                data = yaml.safe_load(p.read_text())
+                matrix_entries.append(make_entry(
+                    case_path, data.get("meta", {}),
+                    data.get("resources", {}), resource_map_path,
+                ))
+
+    elif args.changed_repos_list:
+        repos = json.loads(args.changed_repos_list)
+        for repo in repos:
+            for entry in list_test_resources(root, repo=repo):
+                matrix_entries.append(resource_entry_to_matrix(entry, repo=repo))
+
+    elif args.changed_repos:
+        info = json.loads(args.changed_repos)
+        if info.get("repo") == "_none_":
+            matrix_entries.append(make_empty_entry(repo="_none_"))
+        else:
+            repo = info["repo"]
+            task = info.get("task", "") or None
+            model = info.get("model", "") or None
+            entries = list_test_resources(root, repo=repo, task=task, model=model)
+            if entries:
+                for entry in entries:
+                    matrix_entries.append(resource_entry_to_matrix(
+                        entry, repo=repo,
+                        task=info.get("task", ""),
+                        model=info.get("model", ""),
+                    ))
+            else:
+                matrix_entries.append(make_empty_entry(repo=repo))
+
+    matrix = {"include": matrix_entries}
+    matrix_json = json.dumps(matrix)
+    print(f"Matrix: {matrix_json}")
+
+    # Write to GITHUB_OUTPUT if available
+    github_output = os.environ.get("GITHUB_OUTPUT")
+    if github_output:
+        with open(github_output, "a") as f:
+            f.write(f"matrix={matrix_json}\n")
+    else:
+        # For local testing, just print to stdout
+        print(json.dumps(matrix, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/flagos-user-tests/tools/run_user_tests.py b/flagos-user-tests/tools/run_user_tests.py
new file mode 100644
index 0000000..754d6a6
--- /dev/null
+++ b/flagos-user-tests/tools/run_user_tests.py
@@ -0,0 +1,579 @@
+#!/usr/bin/env python3
+"""Run user-submitted test cases against FlagOS repositories.
+
+Each test case is a self-contained YAML config that defines:
+  - setup: how to install the repo and dependencies (user's perspective)
+  - run: how to execute the test (user's perspective)
+  - verify: how to check results against gold values
+
+This runner simply executes user-defined commands — it does NOT call
+any internal repo test scripts. This keeps test cases at the "user level".
+
+Usage:
+    # Run a specific test case
+    python tools/run_user_tests.py --case tests/flagscale/train/mixtral/tp2_pp1_ep2.yaml
+
+    # Run all test cases for a repo
+    python tools/run_user_tests.py --repo flagscale
+
+    # Run all test cases for a repo+task+model
+    python tools/run_user_tests.py --repo flagscale --task train --model mixtral
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+import yaml
+
+# ---------------------------------------------------------------------------
+# Gold-value comparison
+# ---------------------------------------------------------------------------
+
+def extract_metrics_from_lines(lines: list[str], metric_keys: list[str]) -> dict:
+    """Extract numeric metric values from log lines.
+
+    Supports common log formats:
+      - Pipe-separated: "iteration 1/10 | lm loss: 1.161E+01 | ..."
+      - Key-value:      "step 1 metric_name:1.234"
+    """
+    results = {k: [] for k in metric_keys}
+
+    for line in lines:
+        for key in metric_keys:
+            # Pattern: "key <number>" or "key: <number>"
+            # Handle keys with or without trailing colon
+            escaped = re.escape(key.rstrip(":"))
+            pattern = rf"{escaped}\s*:?\s*([+-]?\d+\.?\d*(?:[eE][+-]?\d+)?)"
+            match = re.search(pattern, line)
+            if match:
+                try:
+                    results[key].append(float(match.group(1)))
+                except ValueError:
+                    pass
+
+    return results
+
+
+def extract_text_from_lines(lines: list[str], pattern: str) -> list[str]:
+    """Extract text values from log lines using a regex pattern.
+
+    The pattern must contain at least one capture group. If multiple groups
+    are present (e.g. alternation), the first non-None group is used.
+    Example pattern: r"output\\.outputs\\[0\\]\\.text=(?:\"(.+?)\"|'(.+?)')"
+    """
+    results = []
+    compiled = re.compile(pattern)
+
+    for line in lines:
+        match = compiled.search(line)
+        if match:
+            # Pick first non-None group
+            val = next((g for g in match.groups() if g is not None), None)
+            if val is not None:
+                results.append(val)
+
+    return results
+
+
+def compare_gold_values(
+    actual: dict, gold: dict, rtol: float = 1e-5, atol: float = 0
+) -> tuple[bool, list[str]]:
+    """Compare actual metrics against gold values.
+
+    Supports two types of gold entries:
+      - numeric (default): {"values": [1.0, 2.0], "type": "numeric"}
+      - text:              {"values": ["hello", "world"], "type": "text",
+                            "pattern": "regex with (capture group)"}
+
+    Returns (all_passed, list_of_messages).
+    """
+    messages = []
+    all_passed = True
+
+    for key, gold_entry in gold.items():
+        gold_values = gold_entry.get("values", [])
+        actual_values = actual.get(key, [])
+        entry_type = gold_entry.get("type", "numeric")
+
+        if not actual_values:
+            messages.append(f"FAIL: No values extracted for metric '{key}'")
+            all_passed = False
+            continue
+
+        if len(actual_values) != len(gold_values):
+            messages.append(
+                f"FAIL: Length mismatch for '{key}': "
+                f"got {len(actual_values)}, expected {len(gold_values)}"
+            )
+            all_passed = False
+            continue
+
+        if entry_type == "text":
+            for i, (a, g) in enumerate(zip(actual_values, gold_values)):
+                if a != g:
+                    messages.append(
+                        f"FAIL: '{key}'[{i}] text mismatch:\n"
+                        f"        actual: {a!r}\n"
+                        f"        gold:   {g!r}"
+                    )
+                    all_passed = False
+                    break
+            else:
+                messages.append(f"PASS: '{key}' ({len(gold_values)} text values match)")
+        else:
+            # numeric comparison — numpy-free allclose
+            for i, (a, g) in enumerate(zip(actual_values, gold_values)):
+                if abs(a - g) > atol + rtol * abs(g):
+                    messages.append(
+                        f"FAIL: '{key}'[{i}] mismatch: actual={a}, gold={g}, "
+                        f"diff={abs(a-g):.6e}"
+                    )
+                    all_passed = False
+                    break
+            else:
+                messages.append(f"PASS: '{key}' ({len(gold_values)} values match)")
+
+    return all_passed, messages
+
+
+# ---------------------------------------------------------------------------
+# Test case execution
+# ---------------------------------------------------------------------------
+
+def run_commands(cmds: list[str], cwd: str, env: dict | None = None) -> int:
+    """Run a list of shell commands sequentially. Return first non-zero exit code."""
+    full_env = {**os.environ, **(env or {})}
+    for cmd in cmds:
+        print(f"  $ {cmd}")
+        result = subprocess.run(cmd, shell=True, cwd=cwd, env=full_env)
+        if result.returncode != 0:
+            print(f"  FAILED (exit code {result.returncode})")
+            return result.returncode
+    return 0
+
+
+def run_test_case(case_path: Path, workdir: Path | None = None) -> int:
+    """Execute a single user test case.
+
+    Test case YAML format:
+        meta:
+          repo: flagscale
+          task: train
+          model: mixtral
+          description: "..."
+
+        resources:
+          platform: cuda
+          device: A100-40GB
+          device_count: 1
+
+        setup:
+          - pip install flagscale
+          - modelscope download --model ... --local_dir ./model_weights
+
+        run:
+          - flagscale train mixtral --config ./conf/tp2_pp1_ep2.yaml
+
+        verify:
+          log_path: "tests/functional_tests/train/mixtral/test_results/tp2_pp1_ep2/logs/..."
+          gold_values_path: "./gold_values/tp2_pp1_ep2.json"
+          # OR inline gold values:
+          gold_values:
+            "lm loss:":
+              values: [11.17587, 11.16908, ...]
+          rtol: 1e-5
+          atol: 0
+    """
+    print(f"\n{'='*60}")
+    print(f"Test Case: {case_path}")
+    print(f"{'='*60}")
+
+    with open(case_path) as f:
+        config = yaml.safe_load(f)
+
+    meta = config.get("meta", {})
+    setup_cmds = config.get("setup", [])
+    run_cmds = config.get("run", [])
+    verify_config = config.get("verify", {})
+
+    print(f"Repo:  {meta.get('repo', 'unknown')}")
+    print(f"Task:  {meta.get('task', 'unknown')}")
+    print(f"Model: {meta.get('model', 'unknown')}")
+    print(f"Desc:  {meta.get('description', '')}")
+    print()
+
+    # Determine working directory — test case files live next to the YAML
+    case_dir = case_path.parent.resolve()
+    cwd = str(workdir.resolve()) if workdir else str(case_dir)
+
+    env = config.get("env", {})
+    # Convert all env values to strings
+    env = {k: str(v) for k, v in env.items()}
+
+    # --- Setup ---
+    if setup_cmds:
+        print("--- Setup ---")
+        rc = run_commands(setup_cmds, cwd=cwd, env=env)
+        if rc != 0:
+            print("SETUP FAILED")
+            return rc
+
+    # --- Run ---
+    if run_cmds:
+        print("\n--- Run ---")
+        rc = run_commands(run_cmds, cwd=cwd, env=env)
+        if rc != 0:
+            print("RUN FAILED")
+            return rc
+
+    # --- Verify ---
+    if verify_config:
+        print("\n--- Verify ---")
+        return verify_results(verify_config, case_dir=case_dir, cwd=cwd)
+
+    print("\nPASSED (no verify step)")
+    return 0
+
+
+def verify_results(verify_config: dict, case_dir: Path, cwd: str) -> int:
+    """Verify test results against gold values."""
+    # Load gold values
+    gold = verify_config.get("gold_values")
+    if not gold:
+        gold_path = verify_config.get("gold_values_path", "")
+        if gold_path:
+            # Resolve relative to case_dir
+            full_path = (case_dir / gold_path) if not Path(gold_path).is_absolute() else Path(gold_path)
+            if not full_path.exists():
+                # Also try relative to cwd
+                full_path = Path(cwd) / gold_path
+            if not full_path.exists():
+                print(f"FAIL: Gold values file not found: {gold_path}")
+                return 1
+            with open(full_path) as f:
+                gold = json.load(f)
+        else:
+            print("No gold values defined, skipping verification")
+            return 0
+
+    # Extract actual metrics from log
+    log_path = verify_config.get("log_path", "")
+    if not log_path:
+        print("FAIL: verify.log_path is required for gold value comparison")
+        return 1
+
+    # Resolve log path — try relative to cwd first, then case_dir
+    full_log = Path(cwd) / log_path
+    if not full_log.exists():
+        full_log = case_dir / log_path
+    if not full_log.exists():
+        # Try glob pattern (user might use * for timestamp dirs)
+        import glob as globmod
+        candidates = globmod.glob(str(Path(cwd) / log_path))
+        if not candidates:
+            candidates = globmod.glob(str(case_dir / log_path))
+        if candidates:
+            full_log = Path(sorted(candidates)[-1])  # latest match
+        else:
+            print(f"FAIL: Log file not found: {log_path}")
+            return 1
+
+    print(f"Log: {full_log}")
+
+    # Read log via subprocess to bypass NFS client cache
+    import time
+    time.sleep(2)
+    log_content = subprocess.run(
+        ["cat", str(full_log)], capture_output=True, text=True
+    ).stdout
+    log_lines = log_content.splitlines()
+
+    # Separate numeric and text gold entries
+    numeric_keys = []
+    actual = {}
+    for key, entry in gold.items():
+        entry_type = entry.get("type", "numeric")
+        if entry_type == "text":
+            pattern = entry.get("pattern", "")
+            if not pattern:
+                print(f"FAIL: Text gold entry '{key}' requires a 'pattern' field")
+                return 1
+            actual[key] = extract_text_from_lines(log_lines, pattern)
+        else:
+            numeric_keys.append(key)
+
+    if numeric_keys:
+        numeric_actual = extract_metrics_from_lines(log_lines, numeric_keys)
+        actual.update(numeric_actual)
+
+    rtol = verify_config.get("rtol", 1e-5)
+    atol = verify_config.get("atol", 0)
+    passed, messages = compare_gold_values(actual, gold, rtol=rtol, atol=atol)
+
+    for msg in messages:
+        print(f"  {msg}")
+
+    print(f"\nResult: {'PASSED' if passed else 'FAILED'}")
+    return 0 if passed else 1
+
+
+# ---------------------------------------------------------------------------
+# Discovery and batch execution
+# ---------------------------------------------------------------------------
+
+def discover_test_cases(
+    root: Path, repo: str | None = None,
+    task: str | None = None, model: str | None = None
+) -> list[Path]:
+    """Find all test case YAML files under tests/.
+
+    Test case YAMLs are identified by having a 'meta' key with 'repo'.
+    """
+    tests_dir = root / "tests"
+    cases = []
+
+    for yaml_path in sorted(tests_dir.rglob("*.yaml")):
+        # Skip files in sub-config dirs (train/, data.yaml, etc.)
+        if yaml_path.name.startswith("_") or yaml_path.name == "data.yaml":
+            continue
+
+        try:
+            with open(yaml_path) as f:
+                data = yaml.safe_load(f)
+            if not isinstance(data, dict) or "meta" not in data:
+                continue
+            meta = data["meta"]
+            if repo and meta.get("repo") != repo:
+                continue
+            if task and meta.get("task") != task:
+                continue
+            if model and meta.get("model") != model:
+                continue
+            cases.append(yaml_path)
+        except (yaml.YAMLError, KeyError):
+            continue
+
+    return cases
+
+
+def _load_resource_map(resource_map_path: Path) -> dict:
+    """Load resource_map.yaml, returning empty dict on failure."""
+    if not resource_map_path.exists():
+        return {}
+    with open(resource_map_path) as f:
+        return yaml.safe_load(f) or {}
+
+
+def _get_platform_config(resource_map: dict, platform: str) -> dict:
+    """Get platform config from resource_map, with fallback to default_platform."""
+    platforms = resource_map.get("platforms", {})
+    if platform and platform in platforms:
+        return platforms[platform]
+    default_platform = resource_map.get("default_platform", "")
+    if default_platform and default_platform in platforms:
+        return platforms[default_platform]
+    return {}
+
+
+def resolve_runner_labels(resources: dict, resource_map_path: Path) -> list[str]:
+    """Resolve test case resources to GitHub Actions runner labels.
+
+    Uses platform-based lookup:
+      resources.platform -> platforms.<name>.device_labels[resources.device]
+
+    Falls back to platform default_labels, then global default_labels.
+    """
+    global_default = ["self-hosted"]
+    resource_map = _load_resource_map(resource_map_path)
+    if not resource_map:
+        return global_default
+
+    global_default = resource_map.get("default_labels", global_default)
+    platform = resources.get("platform", "")
+    pcfg = _get_platform_config(resource_map, platform)
+    if not pcfg:
+        return global_default
+
+    platform_default = pcfg.get("default_labels", global_default)
+    device = resources.get("device", "")
+    if not device:
+        return platform_default
+
+    # Case-insensitive device lookup
+    device_labels = pcfg.get("device_labels", {})
+    for key, labels in device_labels.items():
+        if key.lower() == device.lower():
+            return labels
+
+    return platform_default
+
+
+def resolve_container_image(
+    repo: str, task: str, resources: dict, resource_map_path: Path
+) -> str:
+    """Resolve test case to a Docker container image.
+
+    Lookup: platform -> container_images -> "<repo>/<task>" | "<repo>" | "default"
+    Returns "" if no image is configured.
+    """
+    resource_map = _load_resource_map(resource_map_path)
+    platform = resources.get("platform", "")
+    pcfg = _get_platform_config(resource_map, platform)
+    images = pcfg.get("container_images", {})
+    if not images:
+        return ""
+
+    key = f"{repo}/{task}" if task else repo
+    image = images.get(key, "")
+    if not image and repo:
+        image = images.get(repo, "")
+    if not image:
+        image = images.get("default", "")
+    return image
+
+
+def resolve_container_options(resources: dict, resource_map_path: Path) -> dict:
+    """Resolve container runtime options and volumes for the given platform.
+
+    Returns {"container_options": str, "container_volumes": list}.
+    """
+    resource_map = _load_resource_map(resource_map_path)
+    platform = resources.get("platform", "")
+    pcfg = _get_platform_config(resource_map, platform)
+    return {
+        "container_options": pcfg.get("container_options", ""),
+        "container_volumes": pcfg.get("container_volumes", []),
+    }
+
+
+def resolve_conda_env(
+    repo: str, task: str, resources: dict, resource_map_path: Path
+) -> str:
+    """Resolve conda environment name for the given platform and repo/task.
+
+    Lookup: platform -> conda_env -> "<repo>/<task>" | "<repo>" | "default"
+    Returns "" if no conda env is configured.
+    """
+    resource_map = _load_resource_map(resource_map_path)
+    platform = resources.get("platform", "")
+    pcfg = _get_platform_config(resource_map, platform)
+    conda_envs = pcfg.get("conda_env", {})
+    if not conda_envs:
+        return ""
+
+    key = f"{repo}/{task}" if task else repo
+    env = conda_envs.get(key, "")
+    if not env and repo:
+        env = conda_envs.get(repo, "")
+    if not env:
+        env = conda_envs.get("default", "")
+    return env
+
+
+def list_test_resources(
+    root: Path, repo: str | None = None,
+    task: str | None = None, model: str | None = None
+) -> list[dict]:
+    """List test cases with their resource requirements, runner labels, and container config.
+
+    Returns a list of dicts with keys:
+      case_path, resources, runner_labels, container_image, container_init,
+      container_options, container_volumes
+    """
+    cases = discover_test_cases(root, repo, task, model)
+    resource_map_path = root / "resource_map.yaml"
+    result = []
+
+    for case_path in cases:
+        with open(case_path) as f:
+            data = yaml.safe_load(f)
+        meta = data.get("meta", {})
+        resources = data.get("resources", {})
+        runner_labels = resolve_runner_labels(resources, resource_map_path)
+        container_image = resolve_container_image(
+            meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path
+        )
+        conda_env = resolve_conda_env(
+            meta.get("repo", ""), meta.get("task", ""), resources, resource_map_path
+        )
+        container_opts = resolve_container_options(resources, resource_map_path)
+        result.append({
+            "case_path": str(case_path),
+            "repo": meta.get("repo", ""),
+            "task": meta.get("task", ""),
+            "model": meta.get("model", ""),
+            "resources": resources,
+            "runner_labels": runner_labels,
+            "container_image": container_image,
+            "conda_env": conda_env,
+            **container_opts,
+        })
+
+    return result
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run user-submitted FlagOS test cases"
+    )
+    parser.add_argument("--case", help="Path to a specific test case YAML")
+    parser.add_argument("--repo", help="Run all cases for this repo")
+    parser.add_argument("--task", help="Filter by task type")
+    parser.add_argument("--model", help="Filter by model name")
+    parser.add_argument(
+        "--workdir",
+        help="Working directory for command execution (default: test case directory)"
+    )
+    parser.add_argument(
+        "--list-resources", action="store_true",
+        help="List test cases with resource requirements and runner labels (JSON output)"
+    )
+    args = parser.parse_args()
+
+    # --list-resources mode: output JSON and exit
+    if args.list_resources:
+        root = Path(".")
+        result = list_test_resources(root, args.repo, args.task, args.model)
+        print(json.dumps(result, indent=2))
+        sys.exit(0)
+
+    workdir = Path(args.workdir) if args.workdir else None
+
+    if args.case:
+        case_path = Path(args.case)
+        if not case_path.exists():
+            print(f"ERROR: Test case not found: {case_path}")
+            sys.exit(1)
+        sys.exit(run_test_case(case_path, workdir))
+
+    if not args.repo:
+        print("ERROR: Specify --case, --repo, or --list-resources")
+        sys.exit(1)
+
+    root = Path(".")
+    cases = discover_test_cases(root, args.repo, args.task, args.model)
+
+    if not cases:
+        print(f"No test cases found for repo={args.repo} task={args.task} model={args.model}")
+        sys.exit(0)
+
+    print(f"Found {len(cases)} test case(s)")
+    failed = 0
+    for case in cases:
+        rc = run_test_case(case, workdir)
+        if rc != 0:
+            failed += 1
+
+    print(f"\n{'='*60}")
+    print(f"Results: {len(cases) - failed}/{len(cases)} passed")
+    print(f"{'='*60}")
+    sys.exit(1 if failed else 0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/flagos-user-tests/tools/test_post_report.sh b/flagos-user-tests/tools/test_post_report.sh
new file mode 100755
index 0000000..2783e36
--- /dev/null
+++ b/flagos-user-tests/tools/test_post_report.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+# Local test script for posting test cases report.
+#
+# Usage:
+#   ./tools/test_post_report.sh <backend_url> [api_token]
+#
+# Example:
+#   ./tools/test_post_report.sh http://10.0.0.1:8080
+#   ./tools/test_post_report.sh http://10.0.0.1:8080 my-secret-token
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+BACKEND_URL="${1:?Usage: $0 <backend_url> [api_token]}"
+BACKEND_URL="${BACKEND_URL%/}"
+API_TOKEN="${2:-}"
+
+LIST_CODE="flagops-user-test-cases"
+LIST_NAME="FlagOps User Test Cases"
+REPORT_PATH="$ROOT_DIR/test_cases_report.json"
+
+HEADER_CONFIG='[
+  {"field": "case_id",    "name": "用例ID",     "required": true,  "sortable": true,  "type": "string"},
+  {"field": "case_name",  "name": "用例名称",   "required": true,  "sortable": false, "type": "string"},
+  {"field": "repo",       "name": "所属子仓库", "required": true,  "sortable": true,  "type": "string"},
+  {"field": "updated_at", "name": "更新时间",   "required": true,  "sortable": true,  "type": "string"}
+]'
+
+# --- Step 1: Collect test cases ---
+echo "=== Step 1: Collect test cases ==="
+cd "$ROOT_DIR"
+python tools/collect_test_cases.py --root . --output "$REPORT_PATH"
+echo "Report content:"
+cat "$REPORT_PATH" | python -m json.tool
+echo ""
+
+# --- Step 2: Post header config ---
+echo "=== Step 2: Post header config ==="
+HEADER_PAYLOAD=$(jq -n \
+  --arg list_code "$LIST_CODE" \
+  --arg list_name "$LIST_NAME" \
+  --argjson header_config "$HEADER_CONFIG" \
+  '{list_code: $list_code, list_name: $list_name, header_config: $header_config}')
+
+echo "URL: ${BACKEND_URL}/flagcicd-backend/list/header"
+echo "Payload:"
+echo "$HEADER_PAYLOAD" | jq .
+
+CURL_ARGS=(-s -X POST -w '\n%{http_code}' -H "Content-Type: application/json" -d "$HEADER_PAYLOAD")
+[ -n "$API_TOKEN" ] && CURL_ARGS+=(-H "Authorization: Bearer $API_TOKEN")
+
+RESPONSE=$(curl "${CURL_ARGS[@]}" "${BACKEND_URL}/flagcicd-backend/list/header")
+HTTP_STATUS=$(echo "$RESPONSE" | tail -n1)
+RESPONSE_BODY=$(echo "$RESPONSE" | sed '$d')
+
+echo "HTTP status: $HTTP_STATUS"
+echo "Response: $RESPONSE_BODY"
+echo ""
+
+# --- Step 3: Post list data ---
+echo "=== Step 3: Post list data ==="
+COMMIT_ID="$(git rev-parse HEAD 2>/dev/null || echo 'unknown')"
+REPO_NAME="flagos-ai/FlagOps"
+WORKFLOW_ID="local-test"
+RUN_ID="local-$$"
+
+DATA_PAYLOAD=$(jq -n \
+  --arg repository_name "$REPO_NAME" \
+  --arg workflow_id "$WORKFLOW_ID" \
+  --arg commit_id "$COMMIT_ID" \
+  --arg run_id "$RUN_ID" \
+  --argjson header_config "$HEADER_CONFIG" \
+  --slurpfile report "$REPORT_PATH" \
+  '{
+    items: [ $report[0] | to_entries[] | . as $entry |
+      ([ $header_config | to_entries[] | .value.field as $f |
+        if .key == 0 then {($f): $entry.key}
+        else {($f): $entry.value[$f]}
+        end
+      ] | add) + {
+        commit_id: $commit_id,
+        repository_name: $repository_name,
+        workflow_id: $workflow_id,
+        run_id: $run_id
+      }
+    ]
+  }')
+
+echo "URL: ${BACKEND_URL}/flagcicd-backend/list/data/${LIST_CODE}"
+echo "Items count: $(echo "$DATA_PAYLOAD" | jq '.items | length')"
+echo "Payload (first item sample):"
+echo "$DATA_PAYLOAD" | jq '{items_count: (.items | length), first_item: .items[0]}'
+
+CURL_ARGS=(-s -X POST -w '\n%{http_code}' -H "Content-Type: application/json" -d "$DATA_PAYLOAD")
+[ -n "$API_TOKEN" ] && CURL_ARGS+=(-H "Authorization: Bearer $API_TOKEN")
+
+RESPONSE=$(curl "${CURL_ARGS[@]}" "${BACKEND_URL}/flagcicd-backend/list/data/${LIST_CODE}")
+HTTP_STATUS=$(echo "$RESPONSE" | tail -n1)
+RESPONSE_BODY=$(echo "$RESPONSE" | sed '$d')
+
+echo "HTTP status: $HTTP_STATUS"
+echo "Response: $RESPONSE_BODY"
+echo ""
+
+# --- Step 4: Query to verify ---
+echo "=== Step 4: Query list data ==="
+QUERY_URL="${BACKEND_URL}/flagcicd-backend/list/data/${LIST_CODE}?page_size=10&page=1&sort=created_at&order=desc"
+echo "URL: $QUERY_URL"
+
+CURL_ARGS=(-s -X GET -w '\n%{http_code}' -H "Accept: application/json")
+[ -n "$API_TOKEN" ] && CURL_ARGS+=(-H "Authorization: Bearer $API_TOKEN")
+
+RESPONSE=$(curl "${CURL_ARGS[@]}" "$QUERY_URL")
+HTTP_STATUS=$(echo "$RESPONSE" | tail -n1)
+RESPONSE_BODY=$(echo "$RESPONSE" | sed '$d')
+
+echo "HTTP status: $HTTP_STATUS"
+echo "Response:"
+echo "$RESPONSE_BODY" | jq . 2>/dev/null || echo "$RESPONSE_BODY"
+
+# Cleanup
+rm -f "$REPORT_PATH"
+echo ""
+echo "=== Done ==="
diff --git a/flagos-user-tests/tools/validators/lint_test_case.py b/flagos-user-tests/tools/validators/lint_test_case.py
new file mode 100644
index 0000000..e171dde
--- /dev/null
+++ b/flagos-user-tests/tools/validators/lint_test_case.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+"""Lint test case directories for completeness and correctness.
+
+Checks:
+- Each test case directory has a README.md
+- Each test case has at least one YAML config
+- README contains required sections (Description, Environment, etc.)
+- No sensitive data patterns (tokens, passwords, private paths)
+"""
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+import yaml
+
+
+VALID_REPOS = [
+    "flagscale", "flaggems", "flagcx", "flagtree",
+    "vllm-fl", "vllm-plugin-fl", "te-fl", "megatron-lm-fl",
+]
+
+# Patterns that might indicate sensitive data in configs
+SENSITIVE_PATTERNS = [
+    re.compile(r"(password|passwd|secret|token|api_key)\s*[:=]", re.IGNORECASE),
+    re.compile(r"/home/[a-zA-Z0-9_]+/", re.IGNORECASE),  # Private user paths
+    re.compile(r"sk-[a-zA-Z0-9]{20,}"),  # API keys
+]
+
+README_REQUIRED_SECTIONS = ["description", "environment"]
+
+
+def find_test_case_dirs(root: Path) -> list[Path]:
+    """Find directories that contain a user-perspective test case YAML (has 'meta' key)."""
+    tests_dir = root / "tests"
+    if not tests_dir.exists():
+        return []
+
+    test_dirs = set()
+    for yaml_file in tests_dir.rglob("*.yaml"):
+        try:
+            data = yaml.safe_load(yaml_file.read_text())
+            if isinstance(data, dict) and "meta" in data:
+                test_dirs.add(yaml_file.parent)
+        except (yaml.YAMLError, OSError):
+            continue
+
+    return sorted(test_dirs)
+
+
+def lint_readme(readme_path: Path, strict: bool = False) -> list[str]:
+    """Check README.md for required content."""
+    errors = []
+    if not readme_path.exists():
+        return [f"{readme_path.parent}: Missing README.md"]
+
+    content = readme_path.read_text().lower()
+
+    if strict:
+        for section in README_REQUIRED_SECTIONS:
+            if section not in content:
+                errors.append(
+                    f"{readme_path}: Missing required section '{section}'"
+                )
+
+    if len(content.strip()) < 20:
+        errors.append(f"{readme_path}: README is too short (less than 20 characters)")
+
+    return errors
+
+
+def lint_sensitive_data(filepath: Path) -> list[str]:
+    """Check for sensitive data patterns in config files."""
+    errors = []
+    content = filepath.read_text()
+    for pattern in SENSITIVE_PATTERNS:
+        matches = pattern.findall(content)
+        if matches:
+            errors.append(
+                f"{filepath}: Possible sensitive data detected: {matches[:3]}"
+            )
+    return errors
+
+
+def lint_yaml_configs(test_dir: Path) -> list[str]:
+    """Lint YAML config files in a test directory."""
+    errors = []
+    yaml_files = list(test_dir.glob("*.yaml"))
+    if not yaml_files:
+        return []
+
+    for yf in yaml_files:
+        try:
+            with open(yf) as f:
+                data = yaml.safe_load(f)
+            if data is None:
+                errors.append(f"{yf}: Empty YAML file")
+        except yaml.YAMLError as e:
+            errors.append(f"{yf}: Invalid YAML - {e}")
+            continue
+
+        # Check for sensitive data
+        errors.extend(lint_sensitive_data(yf))
+
+    return errors
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Lint test case directories")
+    parser.add_argument(
+        "--path", default=".",
+        help="Root directory of flagos-user-tests"
+    )
+    parser.add_argument(
+        "--strict", action="store_true",
+        help="Enable strict checks (README sections, etc.)"
+    )
+    args = parser.parse_args()
+
+    root = Path(args.path)
+    all_errors = []
+    warnings = []
+
+    test_dirs = find_test_case_dirs(root)
+    if not test_dirs:
+        print("No test case directories found.")
+        sys.exit(0)
+
+    for test_dir in test_dirs:
+        # Check README
+        readme_errors = lint_readme(test_dir / "README.md", strict=args.strict)
+        if args.strict:
+            all_errors.extend(readme_errors)
+        else:
+            warnings.extend(readme_errors)
+
+        # Lint YAML configs
+        all_errors.extend(lint_yaml_configs(test_dir))
+
+    if warnings:
+        print(f"Warnings ({len(warnings)}):")
+        for w in warnings:
+            print(f"  ⚠ {w}")
+
+    if all_errors:
+        print(f"Lint FAILED with {len(all_errors)} error(s):")
+        for err in all_errors:
+            print(f"  ✗ {err}")
+        sys.exit(1)
+    else:
+        print(f"Lint PASSED: {len(test_dirs)} test directory(ies) checked.")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/flagos-user-tests/tools/validators/validate_config.py b/flagos-user-tests/tools/validators/validate_config.py
new file mode 100644
index 0000000..10cf0dc
--- /dev/null
+++ b/flagos-user-tests/tools/validators/validate_config.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""Validate YAML configuration files for test cases.
+
+Checks:
+- YAML syntax validity
+- Test case YAML (with meta key): required fields (meta.repo, setup, run)
+- FlagScale sub-configs (experiment/defaults): structure validation
+- Generic configs: non-empty dict
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+import yaml
+
+
+VALID_REPOS = [
+    "flagscale", "flaggems", "flagcx", "flagtree",
+    "vllm-fl", "vllm-plugin-fl", "te-fl", "megatron-lm-fl",
+]
+
+
+def validate_yaml_syntax(filepath: Path) -> list[str]:
+    """Check that a file is valid YAML."""
+    errors = []
+    try:
+        with open(filepath) as f:
+            data = yaml.safe_load(f)
+        if data is None:
+            errors.append(f"{filepath}: YAML file is empty")
+    except yaml.YAMLError as e:
+        errors.append(f"{filepath}: Invalid YAML syntax - {e}")
+    return errors
+
+
+def validate_test_case(filepath: Path, data: dict) -> list[str]:
+    """Validate a user-perspective test case YAML (has 'meta' key)."""
+    errors = []
+    meta = data.get("meta", {})
+
+    if not meta.get("repo"):
+        errors.append(f"{filepath}: Missing 'meta.repo'")
+    elif meta["repo"] not in VALID_REPOS:
+        errors.append(f"{filepath}: Invalid meta.repo '{meta['repo']}'")
+
+    if not data.get("run"):
+        errors.append(f"{filepath}: Missing 'run' (list of commands)")
+    elif not isinstance(data["run"], list):
+        errors.append(f"{filepath}: 'run' must be a list of commands")
+
+    if "setup" in data and not isinstance(data["setup"], list):
+        errors.append(f"{filepath}: 'setup' must be a list of commands")
+
+    if "verify" in data:
+        v = data["verify"]
+        if isinstance(v, dict):
+            has_gold = v.get("gold_values") or v.get("gold_values_path")
+            if has_gold and not v.get("log_path"):
+                errors.append(f"{filepath}: verify.log_path required when gold values are defined")
+
+    return errors
+
+
+def validate_flagscale_subconfig(filepath: Path, data: dict) -> list[str]:
+    """Validate FlagScale sub-config (experiment config or train params)."""
+    errors = []
+    keys = set(data.keys())
+
+    if "experiment" in keys:
+        exp = data["experiment"]
+        if "exp_name" not in exp:
+            errors.append(f"{filepath}: Missing 'experiment.exp_name'")
+        if "task" not in exp:
+            errors.append(f"{filepath}: Missing 'experiment.task'")
+        elif "type" not in exp.get("task", {}):
+            errors.append(f"{filepath}: Missing 'experiment.task.type'")
+    elif "defaults" in keys:
+        # Sub-config (train params, data, etc.) — lighter validation
+        pass
+    else:
+        errors.append(
+            f"{filepath}: Missing expected top-level key "
+            f"('experiment' or 'defaults'), found: {keys}"
+        )
+    return errors
+
+
+def validate_file(filepath: Path) -> list[str]:
+    """Validate a single YAML file based on its content type."""
+    errors = validate_yaml_syntax(filepath)
+    if errors:
+        return errors
+
+    with open(filepath) as f:
+        data = yaml.safe_load(f)
+    if not isinstance(data, dict):
+        return [f"{filepath}: Must be a YAML mapping"]
+
+    # Determine type by content
+    if "meta" in data:
+        # User-perspective test case
+        return validate_test_case(filepath, data)
+    elif "experiment" in data or "defaults" in data:
+        # FlagScale sub-config (Hydra config)
+        return validate_flagscale_subconfig(filepath, data)
+    else:
+        # Generic config — just check it's a valid non-empty dict
+        return []
+
+
+def find_yaml_files(root: Path) -> list[Path]:
+    """Find all YAML files under tests/."""
+    tests_dir = root / "tests"
+    if not tests_dir.exists():
+        return []
+    return sorted(tests_dir.rglob("*.yaml"))
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Validate test case YAML configs")
+    parser.add_argument("--path", default=".", help="Root directory of flagos-user-tests")
+    parser.add_argument("--changed-files", default="", help="Comma-separated list of changed files")
+    args = parser.parse_args()
+
+    root = Path(args.path)
+
+    if args.changed_files:
+        yaml_files = [
+            Path(f) for f in args.changed_files.split(",")
+            if f.strip().endswith(".yaml") and f.strip().startswith("tests/")
+        ]
+    else:
+        yaml_files = find_yaml_files(root)
+
+    if not yaml_files:
+        print("No YAML test config files found to validate.")
+        sys.exit(0)
+
+    all_errors = []
+    for filepath in yaml_files:
+        full_path = root / filepath if not filepath.is_absolute() else filepath
+        if not full_path.exists():
+            all_errors.append(f"{filepath}: File does not exist")
+            continue
+        all_errors.extend(validate_file(full_path))
+
+    if all_errors:
+        print(f"Validation FAILED with {len(all_errors)} error(s):")
+        for err in all_errors:
+            print(f"  ✗ {err}")
+        sys.exit(1)
+    else:
+        print(f"Validation PASSED: {len(yaml_files)} file(s) checked.")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/flagos-user-tests/tools/validators/validate_gold_values.py b/flagos-user-tests/tools/validators/validate_gold_values.py
new file mode 100644
index 0000000..6d690cc
--- /dev/null
+++ b/flagos-user-tests/tools/validators/validate_gold_values.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""Validate gold values JSON files for test cases.
+
+Checks:
+- Valid JSON syntax
+- Expected structure: keys map to objects with "values" arrays
+- At least one value is present
+- Numeric entries (default): all values are int/float
+- Text entries (type: "text"): all values are strings, "pattern" field is present
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+def validate_gold_values_file(filepath: Path) -> list[str]:
+    """Validate a single gold values JSON file."""
+    errors = []
+
+    try:
+        with open(filepath) as f:
+            data = json.load(f)
+    except json.JSONDecodeError as e:
+        return [f"{filepath}: Invalid JSON - {e}"]
+
+    if not isinstance(data, dict):
+        return [f"{filepath}: Gold values must be a JSON object, got {type(data).__name__}"]
+
+    if not data:
+        return [f"{filepath}: Gold values file is empty"]
+
+    for key, value in data.items():
+        if not isinstance(value, dict):
+            errors.append(f"{filepath}: Key '{key}' must map to an object, got {type(value).__name__}")
+            continue
+
+        if "values" not in value:
+            errors.append(f"{filepath}: Key '{key}' missing 'values' field")
+            continue
+
+        values = value["values"]
+        if not isinstance(values, list):
+            errors.append(f"{filepath}: Key '{key}'.values must be an array")
+            continue
+
+        if len(values) == 0:
+            errors.append(f"{filepath}: Key '{key}'.values is empty")
+            continue
+
+        entry_type = value.get("type", "numeric")
+
+        if entry_type == "text":
+            # Text entries require a 'pattern' field for extraction
+            if "pattern" not in value:
+                errors.append(f"{filepath}: Key '{key}' has type 'text' but missing 'pattern' field")
+            for i, v in enumerate(values):
+                if not isinstance(v, str):
+                    errors.append(
+                        f"{filepath}: Key '{key}'.values[{i}] is not a string: {v!r}"
+                    )
+        elif entry_type == "numeric":
+            for i, v in enumerate(values):
+                if not isinstance(v, (int, float)):
+                    errors.append(
+                        f"{filepath}: Key '{key}'.values[{i}] is not numeric: {v!r}"
+                    )
+        else:
+            errors.append(f"{filepath}: Key '{key}' has unknown type: {entry_type!r}")
+
+    return errors
+
+
+def find_gold_values_files(root: Path) -> list[Path]:
+    """Find all gold values JSON files under tests/.
+
+    Supports both conventions:
+    - FlagScale: tests/<repo>/<task>/<model>/gold_values/<case>.json
+    - Flat: tests/<repo>/<case>/<case>_gold_values.json
+    """
+    tests_dir = root / "tests"
+    if not tests_dir.exists():
+        return []
+    # Match files inside gold_values/ directories
+    gold_dir_files = list(tests_dir.rglob("gold_values/*.json"))
+    # Match files with _gold_values in name (legacy flat layout)
+    gold_name_files = list(tests_dir.rglob("*_gold_values.json"))
+    return list(set(gold_dir_files + gold_name_files))
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Validate gold values JSON files")
+    parser.add_argument(
+        "--path", default=".",
+        help="Root directory of flagos-user-tests"
+    )
+    args = parser.parse_args()
+
+    root = Path(args.path)
+    all_errors = []
+
+    gold_files = find_gold_values_files(root)
+
+    if not gold_files:
+        print("No gold values files found. Skipping validation.")
+        sys.exit(0)
+
+    for filepath in gold_files:
+        all_errors.extend(validate_gold_values_file(filepath))
+
+    if all_errors:
+        print(f"Gold values validation FAILED with {len(all_errors)} error(s):")
+        for err in all_errors:
+            print(f"  ✗ {err}")
+        sys.exit(1)
+    else:
+        print(f"Gold values validation PASSED: {len(gold_files)} file(s) checked.")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()