From fdd937c0f58612c6a71dad460562c15471a65eef Mon Sep 17 00:00:00 2001 From: le Date: Wed, 29 Apr 2026 13:36:42 +0800 Subject: [PATCH 1/6] enhance public showcase for multi-agent lab --- .github/workflows/public-smoke.yml | 17 ++++ CONTRIBUTING.md | 91 ++++++++++++++++++ LICENSE | 21 +++++ README.md | 145 +++++++++++++++++++++++++++-- ROADMAP.md | 38 ++++++++ docs/architecture.md | 109 ++++++++++++++++++++++ docs/mimo-orbit.md | 84 +++++++++++++++++ docs/security.md | 105 +++++++++++++++++++++ docs/workflows.md | 120 ++++++++++++++++++++++++ examples/architecture-review.md | 143 ++++++++++++++++++++++++++++ examples/info-workflow.md | 96 +++++++++++++++++++ examples/modify-workflow.md | 131 ++++++++++++++++++++++++++ tests/smoke_public.sh | 126 +++++++++++++++++++++++++ 13 files changed, 1217 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/public-smoke.yml create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 ROADMAP.md create mode 100644 docs/architecture.md create mode 100644 docs/mimo-orbit.md create mode 100644 docs/security.md create mode 100644 docs/workflows.md create mode 100644 examples/architecture-review.md create mode 100644 examples/info-workflow.md create mode 100644 examples/modify-workflow.md create mode 100755 tests/smoke_public.sh diff --git a/.github/workflows/public-smoke.yml b/.github/workflows/public-smoke.yml new file mode 100644 index 0000000..feafcba --- /dev/null +++ b/.github/workflows/public-smoke.yml @@ -0,0 +1,17 @@ +name: Public Smoke Test + +on: + push: + branches: [main, public-main, enhance-public-showcase] + pull_request: + branches: [main, public-main, enhance-public-showcase] + +jobs: + smoke: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Run public smoke test + run: bash tests/smoke_public.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..701dfed --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,91 @@ +# Contributing to Multi Agent Lab + +## Adding a New Agent + +1. Create a new file in `.multi-agent/agents/your_agent.md` +2. Follow the existing agent template format: + +```markdown +# Your Agent Name + +## Responsibility +One sentence on what this agent does. + +## Inputs +What this agent reads (goals, findings, plans, etc.) + +## Outputs +What this agent produces. + +## Safety Considerations +Any specific safety concerns for this role. +``` + +3. Add the agent to the coordinator's agent list in `config.yaml` +4. Add a corresponding workflow type in `.multi-agent/workflows/` if needed +5. Update `docs/architecture.md` to document the new agent role +6. Run `bash tests/smoke_public.sh` to verify structure + +## Adding a New Workflow + +1. Create a new file in `.multi-agent/workflows/your_workflow.yaml` +2. Define which agents run and in what order: + +```yaml +name: your_workflow +agents: + - router + - scout + - analyst + - guard + - executor + - verifier +stop_conditions: + - guard_denied + - verifier_failed +``` + +3. Update `docs/workflows.md` with the new workflow +4. Add an example in `examples/` if applicable + +## Safety-First Contribution Rules + +- **Never commit secrets** — no API keys, tokens, passwords, or credentials +- **Never commit runtime artifacts** — no `logs/`, `memory/daily/`, `memory/failures/`, `memory/decisions/` +- **Workspace files only** — executor writes must stay in `.multi-agent/workspace/` +- **Test before push** — run `bash tests/smoke_public.sh` before submitting a PR +- **Describe what was implemented** — do not claim aspirational features as implemented + +## Before Opening a PR + +Run the full pre-push checklist: + +```bash +# 1. Smoke test passes +bash tests/smoke_public.sh + +# 2. No forbidden files in git status +git status --short | grep -E 'logs|memory/daily|memory/failures|memory/decisions|workspace/.*\.txt|\.env' + +# 3. No accidental secrets +grep -rInE 'api[_-]?key|token|secret|password|credential|BEGIN PRIVATE|sk-' \ + --exclude-dir=.git \ + --exclude="*.md" . + +# 4. All new scripts pass syntax checks +bash -n new_script.sh +python3 -m py_compile new_script.py +``` + +## Repository Hygiene + +This is a public repository. Assume everything you push is visible to the internet. + +- The `.gitignore` covers most secret patterns — extend it if you add new tools +- Do not add `.env` files, `*.pem` keys, or credential caches +- If you accidentally push a secret, treat it as compromised and rotate immediately +- When in doubt, ask before pushing + +## Code of Conduct + +Be respectful and collaborative. This is an educational and experimental project. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b6d4878 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 qian-le + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index fc14aa9..c6aba1a 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,139 @@ A local multi-agent workflow skeleton built around OpenClaw, Hermes, and Claude Code. -This repository contains: -- Agent role prompts -- Workflow definitions -- Runtime scripts -- Adapter documents -- Memory templates -- Safe executor and verifier logic - -Runtime logs, local memories, generated workspace files, and private tool configs are excluded from the public release. +## Project Scope + +This repository contains a **workflow orchestration framework** for multi-agent collaboration on a single machine. The system routes tasks through specialized agents — Router, Scout, Analyst, Hermes Reviewer, Guard, Executor, Verifier, and Memory Manager — to plan, review, and execute work safely within a sandboxed workspace. + +It is designed as a local development and experimentation environment, not a hosted or production system. + +## Core Features + +- **Router** — classifies incoming goals into workflow types +- **Scout** — read-only inspection of workspace and memory +- **Analyst** — plans approach and sub-steps +- **Hermes Reviewer** — fallback review for complex or ambiguous tasks +- **Guard** — mandatory safety gate before any execution +- **Executor** — runs actions via shell, Python, or Claude Code +- **Verifier** — checks executor output matches intent +- **Memory Manager** — reads/writes structured memory templates +- **Safe workspace boundary** — all writes stay under `.multi-agent/workspace/` + +## System Architecture + +``` +User Goal + └─> Main Coordinator + └─> Router (classify task type) + ├─> Scout (inspect) + ├─> Analyst (plan) + ├─> Hermes Reviewer (complex tasks) + ├─> Guard (safety gate) + ├─> Executor (run) + ├─> Verifier (check result) + └─> Memory Manager (record) +``` + +## Supported Workflows + +| Workflow | Trigger | Stops at | +|---|---|---| +| `info` | file inspection, search, read | never executes | +| `analysis` | evaluation, comparison, planning | never executes | +| `modify` | create/update files in workspace | Guard or Verifier | +| `debug` | diagnose failures | Guard blocks risky ops | +| `architecture` | design review, dry-run | always a plan only | +| `risky` | sudo, system paths, rm -rf | Guard blocks by default | + +## Quick Start + +```bash +# Detect available runtimes +bash .multi-agent/scripts/detect_tools.sh + +# Dry-run an info workflow +bash .multi-agent/scripts/run_workflow.sh --type info --goal "list all agent files" + +# Dry-run an architecture review +bash .multi-agent/scripts/run_workflow.sh --type architecture --goal "review current agent roles" + +# Run the public smoke test +bash tests/smoke_public.sh +``` + +## Repository Layout + +``` +multi-agent-lab/ +├── .multi-agent/ # Core skeleton +│ ├── README.md # Skeleton overview +│ ├── config.yaml # Workflow routing config +│ ├── agents/ # Agent role definitions +│ ├── adapters/ # Backend adapters (shell, openclaw, hermes, claude_code) +│ ├── workflows/ # Workflow type definitions +│ ├── scripts/ # Runner, verifier, guard check, memory writer +│ ├── memory/ +│ │ ├── templates/ # Daily, decision, failure, lesson templates +│ │ ├── project/ # Project status +│ │ └── lessons/ # Cross-task learnings +│ └── workspace/ # Sandbox for executor writes +├── docs/ # Architecture, security, workflow guides +├── examples/ # Example runs (read-only, no real logs) +├── tests/ # Public smoke test +├── README.md # This file +├── LICENSE # MIT +├── CONTRIBUTING.md +└── ROADMAP.md +``` + +## Safety Model + +The system enforces a **Guard before Executor** policy: + +- Guard evaluates all execution requests against a deny-list +- Forbidden: `sudo`, `rm -rf /`, `chmod -R`, `chown -R`, system paths (`/etc`, `/usr`, `/var`, `/opt`) +- Executor writes are **workspace-only** (`.multi-agent/workspace/`) +- No credentials, tokens, keys, or secrets are ever logged +- All memory writes go to structured templates, not raw logs + +## Why This Matters + +Most agent demos show a single LLM call. This skeleton shows how multiple agents with different responsibilities can collaborate — with a safety gate that actually stops destructive operations, and a verifier that checks whether the executor actually did what was asked. + +This is a **workflow skeleton**, not a deployed product. It is useful for studying agent role separation, testing safety boundaries, and running local automation with review steps. + +## MiMo Orbit Relevance + +MiMo-V2.5's long-context reasoning is relevant to this skeleton in several planned areas: + +- **Analyst planning** — multi-step plans with tool call reasoning +- **Hermes review** — fallback review for ambiguous or high-stakes tasks +- **Verifier reasoning** — checking whether execution output satisfies the original goal +- **Memory summarization** — condensing long session history into reusable lessons +- **Workflow optimization** — learning from past Guard decisions to route more efficiently + +The skeleton currently uses rule-based routing and heuristic Guard checks. MiMo-V2.5 integration is a future enhancement target, not a current implementation. + +## Roadmap + +### Near-term +- Polish public documentation and examples +- Expand smoke tests for all workflow types +- Add more local verifier checks + +### Mid-term +- Integrate OpenClaw runtime agent calls (real subprocess routing) +- Improve Claude Code backend adapter +- Improve Hermes non-interactive reviewer mode +- Add richer memory retrieval (semantic vs keyword) + +### Long-term +- MiMo-V2.5 based long-context planner for Analyst +- Multi-model routing based on task complexity +- Task graph execution with dependency tracking +- Self-evaluation loop for verifier +- Optional web dashboard for workflow monitoring + +## License + +MIT diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..23d87b3 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,38 @@ +# Roadmap + +## Near-term (next 1-2 months) + +- [ ] Polish public README and docs for clarity and accuracy +- [ ] Expand `examples/` with more workflow scenarios (debug, risky, analysis) +- [ ] Add more verifier checks in `verify.py` (file existence, content match, exit code) +- [ ] Improve `smoke_public.sh` to cover all workflow types +- [ ] Add `docs/api.md` documenting adapter interface for new backend developers + +## Mid-term (3-6 months) + +- [ ] Integrate OpenClaw runtime agent dispatch (real `sessions_spawn` routing instead of script simulation) +- [ ] Improve Claude Code backend adapter to support persistent sessions +- [ ] Implement Hermes non-interactive reviewer mode (`hermes chat -Q` with stdin prompt) +- [ ] Add semantic memory retrieval (keyword search → lightweight embedding search) +- [ ] Write integration tests that mock the executor for CI without needing real tool access +- [ ] Add a `docs/testing.md` guide for local testing without secrets + +## Long-term (6-12 months) + +- [ ] **MiMo-V2.5 based Analyst** — long-context planner using MiMo API +- [ ] **MiMo-V2.5 based Verifier** — semantic output verification with reasoning +- [ ] **MiMo-V2.5 based Hermes Reviewer** — contextual risk assessment +- [ ] Multi-model routing — route simple tasks to smaller/faster models, complex to MiMo-V2.5 +- [ ] Task graph execution — dependency tracking between multi-step sub-tasks +- [ ] Self-evaluation loop — verifier output feeds back into next Analyst plan +- [ ] Optional web dashboard — visualize workflow state, Guard decisions, and memory summary +- [ ] Publish to a package index (e.g., PyPI) for easier installation + +## Investigating / Backlog + +- [ ] macOS compatibility (shell adapter path separators, process management) +- [ ] Windows compatibility (WSL2 vs native PowerShell adapter) +- [ ] Parallel agent execution for independent Scout + Analyst steps +- [ ] Timeout configuration per workflow type +- [ ] Memory compaction — summarize old `daily.md` records into `lesson.md` +- [ ] Guard override mechanism with justification logging (for advanced users) diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..8d72b9f --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,109 @@ +# System Architecture + +## Overview + +Multi Agent Lab is a workflow skeleton that routes tasks through a chain of specialized agents. Each agent has a single responsibility. The coordinator (Main) orchestrates the pipeline and holds final decision authority. + +The system is designed for local execution on a single machine. It does not involve remote agent hosting or hosted LLM backends in its current form. + +## Agent Roles + +| Agent | Responsibility | Reads | Writes | +|---|---|---|---| +| **Router** | Classifies task type from goal | goal text | workflow type | +| **Scout** | Inspect workspace, memory, files | filesystem, memory | scout findings | +| **Analyst** | Generate sub-step plan | scout findings, memory | plan with steps | +| **Hermes Reviewer** | Fallback review for complex tasks | analyst plan | review notes | +| **Guard** | Safety gate before execution | action, context | APPROVED / DENIED | +| **Executor** | Perform the action | guard approval | stdout, files | +| **Verifier** | Check executor output | executor result, goal | verified / failed | +| **Memory Manager** | Record session to templates | all agent outputs | memory templates | + +## Stage-by-Stage Lifecycle + +``` +User Goal + │ + ▼ +Router.classify(goal) ──► workflow type + │ + ▼ +Scout.inspect() ──► findings (workspace state + memory) + │ + ▼ +Analyst.plan() ──► step list (id / kind / task) + │ + ▼ +Hermes.review() [only if complexity == complex] + │ + ▼ +Guard.evaluate() ──► APPROVED / DENIED / NEEDS_CONFIRMATION + │ + ▼ [APPROVED only] +Executor.run() + │ + ▼ +Verifier.check() + │ + ▼ +MemoryManager.write() +``` + +## Why Guard and Verifier Exist + +Most agent demos skip the step before execution. Guard enforces a checklist: + +- Is this a forbidden command (sudo, rm -rf, system paths)? +- Is this writing outside the workspace boundary? +- Are credentials or secrets involved? + +Guard can block execution entirely. When it does, the pipeline stops. + +Verifier checks the executor's output against the original goal. Even approved executions can fail. Verifier's job is to detect that failure and report it. + +## Adapter Layer + +Each backend (shell, OpenClaw, Hermes, Claude Code) is behind a uniform adapter interface. This makes it possible to: + +- Swap the shell executor for a Python executor +- Route to Claude Code for specific tasks +- Keep Hermes as a reviewer without making it a primary executor + +Current adapters: + +``` +adapters/ +├── shell_adapter.md # bash / python3 / node execution +├── openclaw_adapter.md # OpenClaw agent dispatch +├── hermes_adapter.md # Hermes advisory review +└── claude_code_adapter.md # Claude Code session dispatch +``` + +## Memory Layer + +Memory is structured as templates, not raw logs. Each task produces a structured record: + +``` +memory/ +├── templates/ +│ ├── daily.md # Per-task session record +│ ├── decision.md # Guard decision with reasoning +│ ├── failure.md # Failed execution with diagnosis +│ └── lesson.md # Cross-task learnings +├── project/ +│ └── status.md # Current project state summary +└── lessons/ + └── guard-before-executor.md # Safety lessons learned +``` + +The memory layer is **read by Scout** at startup so future tasks have context. It is **written by Memory Manager** at task end. + +## Workspace Boundary + +The executor may only write to: + +``` +.multi-agent/workspace/ +``` + +Files outside this boundary require Guard approval with elevated justification. The shell adapter checks all write paths before executing. diff --git a/docs/mimo-orbit.md b/docs/mimo-orbit.md new file mode 100644 index 0000000..9853cf9 --- /dev/null +++ b/docs/mimo-orbit.md @@ -0,0 +1,84 @@ +# MiMo Orbit Integration + +## Why MiMo-V2.5 + +MiMo-V2.5's extended context windows and long-context reasoning capability are relevant to several bottlenecks in the current skeleton: + +### Long-Context Planning + +The Analyst agent currently generates plans from a fixed-context prompt (scout findings + memory). For complex tasks with many sub-steps, the plan quality degrades as the context grows. MiMo-V2.5's extended context could maintain plan coherence across dozens of sub-steps without truncation. + +### Hermes as Fallback Reviewer + +Hermes currently works as a rule-based reviewer. With MiMo-V2.5, the reviewer could: + +- Understand nuanced safety trade-offs in execution requests +- Detect when an approved-looking command has a hidden risky side effect +- Provide natural-language reasoning for why a request was flagged + +### Verifier Reasoning + +The Verifier currently checks output against a simple diff or exit-code check. MiMo-V2.5's reasoning could: + +- Detect partial success (e.g., 9 of 10 files created) +- Identify semantic mismatches between goal and output +- Provide a natural-language diagnosis when verification fails + +### Memory Summarization + +The memory layer currently stores raw structured records. Over time, these accumulate. MiMo-V2.5 could: + +- Summarize a week's worth of task records into a compact lesson +- Extract reusable patterns from decision logs +- Prioritize which lessons are most relevant for a new task + +## Current State vs. Future State + +| Component | Current | Future with MiMo-V2.5 | +|---|---|---| +| **Router** | Keyword matching | Semantic task classification | +| **Scout** | File search + grep | Deep codebase analysis | +| **Analyst** | Fixed-context planning | Long-context multi-step planning | +| **Hermes** | Rule-based review | Reasoning-based review | +| **Guard** | Pattern deny-list | Contextual risk assessment | +| **Verifier** | Exit code + diff | Semantic output verification | +| **Memory** | Raw templates | Summarized + retrievable lessons | + +## Planned Integration Points + +### 1. MiMo as Analyst Backend + +Replace the current fixed-prompt Analyst with a MiMo-V2.5 call. Input: scout findings + memory context. Output: structured plan with sub-steps. + +### 2. MiMo as Hermes Reviewer + +Route complex tasks (as detected by Router) to MiMo for a second review pass before Guard. MiMo reviews the Analyst's plan and provides a confidence score + reasoning. + +### 3. MiMo as Verifier + +After Executor completes, route output to MiMo for semantic verification. MiMo checks: did the execution actually satisfy the goal? Provide a natural-language verdict. + +### 4. MiMo for Memory Management + +Periodically run MiMo over accumulated lesson memory to produce summarized updates. Keep only the most relevant lessons in active context. + +## Not Claiming Current Integration + +**Important:** The skeleton currently does NOT have MiMo-V2.5 integration. The adapter layer (`adapters/`) supports shell, OpenClaw, Hermes, and Claude Code. MiMo is a planned integration target, not a current feature. + +Do not claim in the README or docs that "MiMo is integrated" unless the adapter and runtime call actually exist in the codebase. + +## How to Add MiMo Integration + +When the integration is ready, the steps would be: + +1. Create `adapters/mimo_adapter.md` following the existing adapter pattern +2. Add `mimo` to the `supported_backends` list in `config.yaml` +3. Update `scripts/call_mimo.sh` (new file) for subprocess invocation +4. Update the Coordinator to route `analyst` / `hermes` / `verifier` calls to the MiMo adapter +5. Add MiMo API key to local `.env` (never committed) +6. Update this document and the README to reflect the integration + +## Eligibility Note + +This skeleton is being submitted for the **MiMo Orbit** program to support the development of the planned MiMo-V2.5 integration described above. The integration does not exist yet — this application is for the capability development grant. diff --git a/docs/security.md b/docs/security.md new file mode 100644 index 0000000..7afe0f9 --- /dev/null +++ b/docs/security.md @@ -0,0 +1,105 @@ +# Security Model + +## Threat Model + +This system runs entirely on a single local machine. The threat model assumes: + +- The operator is the only human with access to the machine +- Agents execute local commands only (shell, Python, Claude Code) +- No network access beyond what individual tools allow +- No multi-user isolation is implemented — all agents run under the same OS user + +The system's job is not to protect against a malicious operator, but to prevent **accidental destructive actions** from escalating — a mis-typed command, a wrong path, or a runaway script. + +## What the System Refuses + +Guard evaluates every execution request against a deny-list. The following are always blocked: + +| Pattern | Example | +|---|---| +| `sudo` | `sudo apt install anything` | +| Recursive force remove | `rm -rf /` or `rm -rf /home/*` | +| Recursive chmod/chown | `chmod -R 777 /` | +| System directories | `/etc`, `/usr`, `/var`, `/opt`, `/root` | +| Credential access | Commands that read `~/.ssh/`, `~/.aws/`, `~/.netrc` | +| Attempt to escape workspace | Any write outside `.multi-agent/workspace/` | + +Guard returns `DENIED` for these cases. The executor never runs. + +## Workspace-Only Write Policy + +The executor may only write to: + +``` +.multi-agent/workspace/ +``` + +Write operations that target paths outside this boundary are blocked by the shell adapter **before** Guard is even consulted. This is enforced as a path check in the adapter layer. + +## Forbidden Commands + +These commands are blocked at the shell adapter level, regardless of Guard's decision: + +- `curl` or `wget` that write to system paths +- `tee` targeting `/etc`, `/usr`, `/var` +- `git` commands that push or commit to non-local remotes +- `ssh` with user@host targets +- `chmod +x` on paths outside workspace + +## Credential Handling + +The system **never logs credentials**. Specifically: + +- No API keys, tokens, passwords, or secrets are written to memory templates +- The `logs/` directory is excluded from version control +- Shell commands that would echo or export credentials are flagged by Guard +- Claude Code adapter is configured to never log raw `ANTHROPIC_API_KEY` or `OPENAI_API_KEY` values + +## Public Repository Hygiene + +Before pushing to a public branch, run: + +```bash +# Check for accidental secrets +grep -rInE 'api[_-]?key|token|secret|password|credential|BEGIN PRIVATE|sk-' . + +# Ensure runtime artifacts are not staged +git status --short | grep -E 'logs|memory/daily|memory/failures|memory/decisions|workspace/.*\.txt' +``` + +If either command produces output, investigate before pushing. + +The `.gitignore` in this repository blocks all common secret patterns. If you add new tools or adapters, extend the ignore list accordingly. + +## How to Safely Run Local Tests + +```bash +# 1. Always run from the project root +cd /path/to/multi-agent-lab + +# 2. Run the smoke test first — no side effects +bash tests/smoke_public.sh + +# 3. Run a workflow with a sandboxed goal +bash .multi-agent/scripts/run_workflow.sh \ + --type info \ + --goal "list all markdown files in this project" + +# 4. Inspect what was written before committing +git status --short + +# 5. If logs/ or memory/daily/ appeared, clean them up +rm -rf .multi-agent/logs .multi-agent/memory/daily/* +git checkout -- .multi-agent/logs .multi-agent/memory/daily +``` + +## Safe Contributor Checklist + +Before opening a PR: + +- [ ] `bash tests/smoke_public.sh` passes +- [ ] No `logs/`, `memory/daily/`, `memory/failures/`, `memory/decisions/` in `git status` +- [ ] No `.env`, `.key`, `.pem`, `*.token`, `credentials*` files staged +- [ ] All new scripts pass `bash -n` +- [ ] All new Python files pass `python3 -m py_compile` +- [ ] README/docs reflect what was actually implemented (not aspirational claims) diff --git a/docs/workflows.md b/docs/workflows.md new file mode 100644 index 0000000..906f03c --- /dev/null +++ b/docs/workflows.md @@ -0,0 +1,120 @@ +# Workflow Reference + +Each workflow type defines which agents run and where the pipeline stops. + +--- + +## info + +**Trigger:** Task is read-only (search, inspect, read, analyze without modification). + +**Pipeline:** +``` +Router → Scout → Analyst → MemoryManager +``` +No execution. No files written. No changes made. + +**Use when:** You want to understand a codebase, inspect memory, or review file contents without touching anything. + +**Stop conditions:** None. Always completes with a Scout + Analyst report. + +--- + +## analysis + +**Trigger:** Task requires multi-step reasoning and comparison (not just inspection). + +**Pipeline:** +``` +Router → Scout → Analyst → Hermes Reviewer → MemoryManager +``` +Executor never runs. Analyst produces a plan, Hermes reviews it. + +**Use when:** Evaluating trade-offs, comparing approaches, planning a refactor, reviewing a design. + +**Stop conditions:** Stops after Analyst + Hermes review. No side effects. + +--- + +## modify + +**Trigger:** Task creates or updates files inside `.multi-agent/workspace/`. + +**Pipeline:** +``` +Router → Scout → Analyst → Guard → Executor → Verifier → MemoryManager +``` +Full pipeline. Guard must approve. Verifier checks result. + +**Use when:** Creating a new file, editing an existing file, generating code. + +**Stop conditions:** +- Guard DENIED: blocked before executor +- Verifier FAILED: execution result does not match goal + +--- + +## debug + +**Trigger:** Diagnosing a failure, running diagnostics, or investigating runtime behavior. + +**Pipeline:** +``` +Router → Scout → Analyst → Guard → Executor → Verifier → MemoryManager +``` +Same as modify, but with extra Scout inspection and workspace state capture. + +**Use when:** Running test scripts, inspecting logs, checking exit codes. + +**Stop conditions:** +- Guard DENIED on risky commands +- Verifier detects unexpected output + +--- + +## architecture + +**Trigger:** Design review, dry-run, or documentation of agent roles. + +**Pipeline:** +``` +Router → Scout → Analyst → Hermes Reviewer → MemoryManager +``` +No executor. No side effects. A pure review pipeline. + +**Use when:** Reviewing system design, planning new agent roles, auditing the workflow itself. + +**Stop conditions:** None. Always ends with a review document. + +--- + +## risky + +**Trigger:** Task touches system paths, uses sudo, or modifies outside `.multi-agent/workspace/`. + +**Pipeline:** +``` +Router → Scout → Analyst → Guard → [BLOCKED] → MemoryManager +``` +Guard blocks by default for risky tasks. Executor is never reached unless Guard has an explicit allow rule. + +**Use when:** Cleaning logs, installing packages, modifying system config. + +**Default Guard behavior:** DENIED. + +**Stop conditions:** Always blocked at Guard unless an explicit override rule is configured (not recommended for public use). + +--- + +## Workflow Type Detection + +Router classifies based on keywords in the goal: + +| Keyword | Workflow | +|---|---| +| `inspect`, `list`, `find`, `read`, `search` | `info` | +| `analyze`, `compare`, `evaluate`, `review` | `analysis` | +| `create`, `write`, `edit`, `modify`, `generate` | `modify` | +| `debug`, `diagnose`, `check`, `test` | `debug` | +| `design`, `plan`, `dry-run`, `architecture` | `architecture` | +| `sudo`, `rm -rf`, `chmod -R`, `system`, `/etc`, `/usr` | `risky` | diff --git a/examples/architecture-review.md b/examples/architecture-review.md new file mode 100644 index 0000000..ba5e249 --- /dev/null +++ b/examples/architecture-review.md @@ -0,0 +1,143 @@ +# Architecture Review Example + +This example demonstrates an `architecture` workflow — a dry-run review of the agent system design with no execution. + +## Goal + +> "Review the current agent role definitions and summarize the Guard-verifier safety pattern" + +## Stage-by-Stage Execution + +### 1. Router.classify() + +**Input:** `"Review the current agent role definitions and summarize the Guard-verifier safety pattern"` +**Output:** `workflow_type = architecture` +**Reasoning:** Keywords "review" and "summarize" indicate a design analysis task. + +### 2. Scout.inspect() + +**Actions:** +- Reads all agent definition files in `.multi-agent/agents/` +- Reads `docs/architecture.md` for system overview +- Reads `.multi-agent/memory/lessons/` for past architecture decisions + +**Findings:** +``` +Agent files (9 total): +- main.md : Coordinator, final authority +- router.md : Workflow classification +- scout.md : Read-only inspection +- analyst.md : Plan generation +- hermes_reviewer.md : Fallback complex-task reviewer +- guard.md : Safety gate, deny-list checking +- executor.md : Shell/Python/Claude Code backend +- verifier.md : Post-execution result checking +- memory_manager.md : Structured memory read/write + +Architecture doc: exists, up to date +Lessons: 1 file (guard-before-executor.md) +``` + +### 3. Analyst.plan() + +**Input:** Scout findings + original goal +**Output:** +```yaml +steps: + - id: "1" + kind: "inspect" + task: "Read guard.md and verifier.md in full" + - id: "2" + kind: "analyze" + task: "Summarize the Guard-verifier safety pattern design" + - id: "3" + kind: "analyze" + task: "Identify any gaps or missing safety considerations" +``` + +### 4. Hermes.review() + +**Input:** Analyst plan for a complex multi-step review +**Output:** +```markdown +## Hermes Review Notes + +The Guard-verifier pattern provides two-layer safety: +1. Guard prevents execution of risky operations +2. Verifier checks output even for approved operations + +This is a defense-in-depth approach. The separation is sound. + +One consideration: the current deny-list is pattern-based. +Future MiMo-V2.5 integration could enable semantic risk assessment. + +Confidence: HIGH for current implementation. +``` + +### 5. Guard.evaluate() + +**Input:** +```json +{ + "action": "architecture_review", + "note": "This is a read-only analysis task" +} +``` + +**Output:** `APPROVED` +**Reasoning:** No execution requested. Read-only review. + +### 6. Executor.run() + +**Does NOT run** — architecture workflows never execute. + +### 7. Verifier.check() + +**Does NOT run** — Verifier only runs after Executor. + +### 8. MemoryManager.write() + +**Action:** Writes `daily.md` and `decision.md` for this architecture review. + +--- + +## Expected Output + +The workflow produces an architecture review document: + +```markdown +# Architecture Review — 2026-04-29 + +## Goal +Review the current agent role definitions and summarize the Guard-verifier safety pattern + +## Workflow +architecture + +## Agents Used +Router → Scout → Analyst → Hermes → Guard → MemoryManager + +## Executor +Did not run (architecture workflow) + +## Key Findings +- 9 agent roles defined, all with documented responsibilities +- Guard-verifier provides two-layer safety: block at gate + verify after run +- Pattern-based deny-list is current implementation; semantic review planned + +## Hermes Notes +The separation of Guard and Verifier is sound. +Defense-in-depth approach confirmed. + +## Result +SUCCESS — Review complete. No execution. +``` + +--- + +## Safety Notes + +- No commands were executed +- No files were created or modified +- This is a pure design review pipeline +- Suitable for auditing the system itself diff --git a/examples/info-workflow.md b/examples/info-workflow.md new file mode 100644 index 0000000..af02bb6 --- /dev/null +++ b/examples/info-workflow.md @@ -0,0 +1,96 @@ +# Info Workflow Example + +This example demonstrates a read-only `info` workflow. No files are created or modified. + +## Goal + +> "List all agent role definition files in the project" + +## Stage-by-Stage Execution + +### 1. Router.classify() + +**Input:** `"list all agent role definition files in the project"` +**Output:** `workflow_type = info` +**Reasoning:** Keyword "list" indicates a read-only inspection task. + +### 2. Scout.inspect() + +**Actions:** +- Reads `.multi-agent/agents/` directory contents +- Reads `.multi-agent/memory/project/status.md` for recent context +- No filesystem modification + +**Findings:** +``` +Agent files found: +- main.md : Main coordinator, task routing +- router.md : Task type classification +- scout.md : Read-only inspection +- analyst.md : Plan generation +- hermes_reviewer.md : Fallback review +- guard.md : Safety gate +- executor.md : Backend execution +- verifier.md : Result verification +- memory_manager.md : Memory read/write +``` + +### 3. Analyst.plan() + +**Input:** Scout findings + original goal +**Output:** +```yaml +steps: + - id: "1" + kind: "inspect" + task: "Confirm all agent files are present and non-empty" + - id: "2" + kind: "analyze" + task: "Summarize each agent's documented responsibility" +``` + +### 4. MemoryManager.write() + +**Action:** Writes `daily.md` entry summarizing the session. + +**Result:** +``` +Status: SUCCESS +Workflow: info +Output: Listed 10 agent files, all non-empty +Steps run: Scout → Analyst → MemoryManager +Executor ran: NO +``` + +## Expected Output + +The workflow produces a structured report: + +```markdown +# info session — 2026-04-29 + +## Goal +List all agent role definition files in the project + +## Workflow +info + +## Findings +[Scout report listing agent files] + +## Analysis +[Analyst summary of each role] + +## Executor +Did not run (info workflow) + +## Result +SUCCESS +``` + +## Notes + +- No files were created or modified +- No shell commands were executed +- No credentials or secrets were accessed +- This is a dry-run document, not a runtime log diff --git a/examples/modify-workflow.md b/examples/modify-workflow.md new file mode 100644 index 0000000..07607a8 --- /dev/null +++ b/examples/modify-workflow.md @@ -0,0 +1,131 @@ +# Modify Workflow Example + +This example demonstrates a `modify` workflow that safely creates a file inside the workspace boundary. + +## Goal + +> "Create a file `demo.txt` in the workspace directory with the content: `hello agent`" + +## Stage-by-Stage Execution + +### 1. Router.classify() + +**Input:** `"Create a file demo.txt in the workspace directory with the content: hello agent"` +**Output:** `workflow_type = modify` +**Reasoning:** Keyword "create" indicates a file modification task. + +### 2. Scout.inspect() + +**Actions:** +- Confirms `.multi-agent/workspace/` directory exists and is writable +- No existing `demo.txt` (first creation) + +**Findings:** +``` +workspace/ + - exists: true + - writable: true + - demo.txt exists: false +``` + +### 3. Analyst.plan() + +**Input:** Scout findings + original goal +**Output:** +```yaml +steps: + - id: "1" + kind: "action" + task: "Write 'hello agent' to .multi-agent/workspace/demo.txt" +``` + +### 4. Guard.evaluate() + +**Input:** +```json +{ + "action": "write_file", + "path": ".multi-agent/workspace/demo.txt", + "content": "hello agent" +} +``` + +**Output:** `APPROVED` +**Reasoning:** +- Write target is inside workspace boundary +- Content is plain text, no credential access +- No system paths involved + +### 5. Executor.run() + +**Command executed:** +```bash +echo "hello agent" > .multi-agent/workspace/demo.txt +``` + +**Output:** +``` +File created: .multi-agent/workspace/demo.txt +Bytes written: 12 +``` + +### 6. Verifier.check() + +**Input:** +```json +{ + "goal": "Create demo.txt with content 'hello agent'", + "executor_output": "File created: .multi-agent/workspace/demo.txt\nBytes written: 12" +} +``` + +**Actions:** +- Reads `.multi-agent/workspace/demo.txt` +- Compares content to expected value + +**Output:** `VERIFIED` +**Reasoning:** File exists and contains exactly `hello agent`. + +### 7. MemoryManager.write() + +**Action:** Writes `daily.md` entry and `decision.md` for the Guard pass. + +--- + +## Expected Output + +After running: +```bash +bash .multi-agent/scripts/run_workflow.sh \ + --type modify \ + --goal "Create demo.txt in workspace with content hello agent" +``` + +**Workspace result:** +```bash +$ cat .multi-agent/workspace/demo.txt +hello agent +``` + +**Memory record (decision.md):** +```markdown +# Guard Decision — 2026-04-29 + +## Action +write_file: .multi-agent/workspace/demo.txt + +## Decision +APPROVED + +## Reasoning +Inside workspace boundary. Plain text content. No credential access. +``` + +--- + +## Safety Notes + +- The write is constrained to `.multi-agent/workspace/` +- Guard would block any write to `~/.ssh/`, `/etc/`, or system paths +- The file content is verified by Verifier after creation +- No secrets or tokens were involved diff --git a/tests/smoke_public.sh b/tests/smoke_public.sh new file mode 100755 index 0000000..7db84d1 --- /dev/null +++ b/tests/smoke_public.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# Public smoke test — checks file structure and script syntax +# No runtime dependencies (openclaw/hermes/claude not required) +set -euo pipefail + +echo "=== Public Smoke Test ===" +echo + +# Check root files +echo "[1] Checking root files..." +for f in README.md LICENSE CONTRIBUTING.md ROADMAP.md .gitignore; do + if [[ -f "$f" ]]; then + echo " OK: $f" + else + echo " MISSING: $f" + exit 1 + fi +done + +# Check core skeleton files +echo +echo "[2] Checking .multi-agent core files..." +for f in \ + .multi-agent/README.md \ + .multi-agent/config.yaml \ + .multi-agent/agents/main.md \ + .multi-agent/agents/router.md \ + .multi-agent/agents/scout.md \ + .multi-agent/agents/analyst.md \ + .multi-agent/agents/guard.md \ + .multi-agent/agents/executor.md \ + .multi-agent/agents/verifier.md \ + .multi-agent/workflows/info.yaml \ + .multi-agent/workflows/modify.yaml \ + .multi-agent/scripts/run_workflow.sh \ + .multi-agent/scripts/verify.py; do + if [[ -f "$f" ]]; then + echo " OK: $f" + else + echo " MISSING: $f" + exit 1 + fi +done + +# Check docs +echo +echo "[3] Checking docs..." +for f in docs/architecture.md docs/workflows.md docs/security.md; do + if [[ -f "$f" ]]; then + echo " OK: $f" + else + echo " MISSING: $f" + exit 1 + fi +done + +# Syntax check bash scripts +echo +echo "[4] Checking bash script syntax..." +shopt -s nullglob +for f in .multi-agent/scripts/*.sh tests/*.sh; do + if bash -n "$f" 2>/dev/null; then + echo " OK: $f" + else + echo " SYNTAX ERROR: $f" + exit 1 + fi +done +shopt -u nullglob + +# Syntax check python scripts +echo +echo "[5] Checking Python script syntax..." +shopt -s nullglob +for f in .multi-agent/scripts/*.py tests/*.py; do + if python3 -m py_compile "$f" 2>/dev/null; then + echo " OK: $f" + else + echo " SYNTAX ERROR: $f" + exit 1 + fi +done +shopt -u nullglob + +# Check forbidden directories are not tracked by git (runtime artifacts only) +echo +echo "[6] Checking forbidden directories not tracked by git..." +# These directories should exist (runtime) but must not be git-tracked +for dir in \ + .multi-agent/logs \ + .multi-agent/memory/daily \ + .multi-agent/memory/failures \ + .multi-agent/memory/decisions; do + tracked=$(git ls-files --error-unmatch "$dir" 2>/dev/null && echo YES || echo NO) + if [[ "$tracked" == "YES" ]]; then + echo " TRACKED (should not be): $dir" + exit 1 + else + echo " OK (not tracked): $dir" + fi +done + +# Check gitignore covers critical items +echo +echo "[7] Checking .gitignore coverage..." +required_patterns=( + ".multi-agent/logs/" + ".multi-agent/memory/daily/" + ".multi-agent/memory/failures/" + ".multi-agent/memory/decisions/" + ".multi-agent/workspace/" + ".env" + ".ssh/" + ".openclaw/" +) +for pat in "${required_patterns[@]}"; do + if grep -q "$pat" .gitignore 2>/dev/null; then + echo " OK: $pat in .gitignore" + else + echo " MISSING in .gitignore: $pat" + exit 1 + fi +done + +echo +echo "=== public smoke test passed ===" From 30d91c9d4d68a4a03610d3b86cc661d8a5929cac Mon Sep 17 00:00:00 2001 From: le Date: Wed, 29 Apr 2026 13:49:46 +0800 Subject: [PATCH 2/6] polish public showcase: CI badge, mermaid diagram, SECURITY.md, enhanced smoke test --- README.md | 48 ++++++++++++++++++------- SECURITY.md | 71 +++++++++++++++++++++++++++++++++++++ docs/mimo-orbit.md | 22 ++++++------ tests/smoke_public.sh | 81 +++++++++++++++++++++++++++++++++++++------ 4 files changed, 189 insertions(+), 33 deletions(-) create mode 100644 SECURITY.md diff --git a/README.md b/README.md index c6aba1a..79e2354 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Multi Agent Lab +[![Public Smoke Test](https://github.com/qian-le/multi-agent-lab/actions/workflows/public-smoke.yml/badge.svg)](https://github.com/qian-le/multi-agent-lab/actions/workflows/public-smoke.yml) + A local multi-agent workflow skeleton built around OpenClaw, Hermes, and Claude Code. ## Project Scope @@ -22,6 +24,24 @@ It is designed as a local development and experimentation environment, not a hos ## System Architecture +```mermaid +flowchart TD + U[User Goal] --> M[Main Coordinator] + M --> R[Router
classify task type] + R --> S[Scout
read-only inspection] + S --> A[Analyst
planning] + A --> H[Hermes Reviewer
complex tasks only] + H --> G[Guard
safety gate] + A --> G + G -->|allow| E[Executor
shell / python / claude_code] + G -->|deny| STOP[Pipeline Stopped] + E --> V[Verifier
check result] + V -->|pass| MEM[Memory Manager
record to templates] + V -->|fail| FAIL[Reported as Failure] +``` + +Text overview: + ``` User Goal └─> Main Coordinator @@ -30,7 +50,7 @@ User Goal ├─> Analyst (plan) ├─> Hermes Reviewer (complex tasks) ├─> Guard (safety gate) - ├─> Executor (run) + ├─> Executor (run) [only if Guard approves] ├─> Verifier (check result) └─> Memory Manager (record) ``` @@ -80,10 +100,11 @@ multi-agent-lab/ │ └── workspace/ # Sandbox for executor writes ├── docs/ # Architecture, security, workflow guides ├── examples/ # Example runs (read-only, no real logs) -├── tests/ # Public smoke test -├── README.md # This file -├── LICENSE # MIT +├── tests/ # Public smoke test +├── README.md # This file +├── LICENSE # MIT ├── CONTRIBUTING.md +├── SECURITY.md └── ROADMAP.md ``` @@ -105,15 +126,17 @@ This is a **workflow skeleton**, not a deployed product. It is useful for studyi ## MiMo Orbit Relevance -MiMo-V2.5's long-context reasoning is relevant to this skeleton in several planned areas: +This project is designed to integrate **MiMo-V2.5-Pro** as the long-context reasoning engine for several key agent roles. The planned integration targets are: -- **Analyst planning** — multi-step plans with tool call reasoning -- **Hermes review** — fallback review for ambiguous or high-stakes tasks -- **Verifier reasoning** — checking whether execution output satisfies the original goal -- **Memory summarization** — condensing long session history into reusable lessons +- **Analyst planning** — multi-step plans with tool call reasoning over long task histories +- **Hermes review** — fallback reasoning review for ambiguous or high-stakes tasks +- **Verifier reasoning** — semantic output verification beyond diff and exit-code checks +- **Memory summarization** — condensing accumulated session records into reusable lessons - **Workflow optimization** — learning from past Guard decisions to route more efficiently -The skeleton currently uses rule-based routing and heuristic Guard checks. MiMo-V2.5 integration is a future enhancement target, not a current implementation. +**Current state:** The skeleton uses rule-based routing and heuristic Guard checks. MiMo-V2.5-Pro integration is a **planned development target**, not a current implementation. The adapter layer is designed to accommodate this integration when the API is available. + +See [docs/mimo-orbit.md](docs/mimo-orbit.md) for the full integration plan. ## Roadmap @@ -129,10 +152,11 @@ The skeleton currently uses rule-based routing and heuristic Guard checks. MiMo- - Add richer memory retrieval (semantic vs keyword) ### Long-term -- MiMo-V2.5 based long-context planner for Analyst +- **MiMo-V2.5-Pro based Analyst** — long-context planner +- **MiMo-V2.5-Pro based Verifier** — semantic output verification +- **MiMo-V2.5-Pro based Hermes Reviewer** — contextual risk reasoning - Multi-model routing based on task complexity - Task graph execution with dependency tracking -- Self-evaluation loop for verifier - Optional web dashboard for workflow monitoring ## License diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..e50b28a --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,71 @@ +# Security Policy + +## Supported Versions + +Only the latest published release is supported with security updates. + +| Version | Supported | +|---|---| +| latest release | ✅ | +| older releases | ❌ | + +## Reporting a Vulnerability + +If you discover a security issue, please report it responsibly: + +1. **Do not** open a public GitHub Issue for security vulnerabilities +2. Send a private report instead +3. Allow 48 hours for initial response + +## Threat Model + +This system runs on a single local machine under a single OS user account. It is designed to prevent **accidental destructive actions** from causing harm. It does not provide isolation between multiple users or machines. + +## What This System Protects Against + +- Accidental deletion of project files via `rm -rf` and similar commands +- Writing to system directories (`/etc`, `/usr`, `/var`, `/opt`) +- Credential exposure through log files +- Unintended modification of files outside `.multi-agent/workspace/` + +## What This System Does NOT Protect Against + +- Malicious operator with local shell access +- Remote code execution attacks +- Privilege escalation +- Multi-user isolation + +## Safety Features + +### Guard Before Executor + +Every execution request passes through Guard first. Guard blocks: + +- `sudo`, `su`, `doas` +- Recursive force remove (`rm -rf /`, `rm -rf /home/*`) +- Recursive chmod/chown on system paths +- Writes to `/etc`, `/usr`, `/var`, `/opt`, `/root` +- Commands that access `~/.ssh/`, `~/.aws/`, `~/.netrc` + +### Workspace-Only Writes + +The Executor is constrained to `.multi-agent/workspace/`. Any attempt to write outside this boundary is blocked at the adapter layer, before Guard is even consulted. + +### No Secret Logging + +API keys, tokens, passwords, and credentials are never written to: + +- Memory templates +- Log files +- stdout / stderr + +## Reporting Guidelines + +- Include a clear description of the issue +- Describe the expected vs. actual behavior +- Include steps to reproduce (if applicable) +- Do not include actual secrets or credentials in reports + +## Security Updates + +Security fixes are applied immediately to the main branch and released as a patch version bump. diff --git a/docs/mimo-orbit.md b/docs/mimo-orbit.md index 9853cf9..7b31065 100644 --- a/docs/mimo-orbit.md +++ b/docs/mimo-orbit.md @@ -1,16 +1,18 @@ # MiMo Orbit Integration -## Why MiMo-V2.5 +> **Status: Planned** — This document describes a future integration target. MiMo-V2.5-Pro is NOT currently integrated into this skeleton. -MiMo-V2.5's extended context windows and long-context reasoning capability are relevant to several bottlenecks in the current skeleton: +## Why MiMo-V2.5-Pro + +MiMo-V2.5-Pros extended context windows and long-context reasoning capability are relevant to several bottlenecks in the current skeleton: ### Long-Context Planning -The Analyst agent currently generates plans from a fixed-context prompt (scout findings + memory). For complex tasks with many sub-steps, the plan quality degrades as the context grows. MiMo-V2.5's extended context could maintain plan coherence across dozens of sub-steps without truncation. +The Analyst agent currently generates plans from a fixed-context prompt (scout findings + memory). For complex tasks with many sub-steps, the plan quality degrades as the context grows. MiMo-V2.5-Pro's extended context could maintain plan coherence across dozens of sub-steps without truncation. ### Hermes as Fallback Reviewer -Hermes currently works as a rule-based reviewer. With MiMo-V2.5, the reviewer could: +Hermes currently works as a rule-based reviewer. With MiMo-V2.5-Pro the reviewer could: - Understand nuanced safety trade-offs in execution requests - Detect when an approved-looking command has a hidden risky side effect @@ -18,7 +20,7 @@ Hermes currently works as a rule-based reviewer. With MiMo-V2.5, the reviewer co ### Verifier Reasoning -The Verifier currently checks output against a simple diff or exit-code check. MiMo-V2.5's reasoning could: +The Verifier currently checks output against a simple diff or exit-code check. MiMo-V2.5-Pros reasoning could: - Detect partial success (e.g., 9 of 10 files created) - Identify semantic mismatches between goal and output @@ -26,7 +28,7 @@ The Verifier currently checks output against a simple diff or exit-code check. M ### Memory Summarization -The memory layer currently stores raw structured records. Over time, these accumulate. MiMo-V2.5 could: +The memory layer currently stores raw structured records. Over time, these accumulate. MiMo-V2.5-Procould: - Summarize a week's worth of task records into a compact lesson - Extract reusable patterns from decision logs @@ -34,7 +36,7 @@ The memory layer currently stores raw structured records. Over time, these accum ## Current State vs. Future State -| Component | Current | Future with MiMo-V2.5 | +| Component | Current | Future with MiMo-V2.5-Pro| |---|---|---| | **Router** | Keyword matching | Semantic task classification | | **Scout** | File search + grep | Deep codebase analysis | @@ -48,7 +50,7 @@ The memory layer currently stores raw structured records. Over time, these accum ### 1. MiMo as Analyst Backend -Replace the current fixed-prompt Analyst with a MiMo-V2.5 call. Input: scout findings + memory context. Output: structured plan with sub-steps. +Replace the current fixed-prompt Analyst with a MiMo-V2.5-Procall. Input: scout findings + memory context. Output: structured plan with sub-steps. ### 2. MiMo as Hermes Reviewer @@ -64,7 +66,7 @@ Periodically run MiMo over accumulated lesson memory to produce summarized updat ## Not Claiming Current Integration -**Important:** The skeleton currently does NOT have MiMo-V2.5 integration. The adapter layer (`adapters/`) supports shell, OpenClaw, Hermes, and Claude Code. MiMo is a planned integration target, not a current feature. +**Important:** The skeleton currently does NOT have MiMo-V2.5-Prointegration. The adapter layer (`adapters/`) supports shell, OpenClaw, Hermes, and Claude Code. MiMo is a planned integration target, not a current feature. Do not claim in the README or docs that "MiMo is integrated" unless the adapter and runtime call actually exist in the codebase. @@ -81,4 +83,4 @@ When the integration is ready, the steps would be: ## Eligibility Note -This skeleton is being submitted for the **MiMo Orbit** program to support the development of the planned MiMo-V2.5 integration described above. The integration does not exist yet — this application is for the capability development grant. +This skeleton is being submitted for the **MiMo Orbit** program to support the development of the planned MiMo-V2.5-Prointegration described above. The integration does not exist yet — this application is for the capability development grant. diff --git a/tests/smoke_public.sh b/tests/smoke_public.sh index 7db84d1..f1f3220 100755 --- a/tests/smoke_public.sh +++ b/tests/smoke_public.sh @@ -8,7 +8,7 @@ echo # Check root files echo "[1] Checking root files..." -for f in README.md LICENSE CONTRIBUTING.md ROADMAP.md .gitignore; do +for f in README.md LICENSE CONTRIBUTING.md ROADMAP.md SECURITY.md .gitignore; do if [[ -f "$f" ]]; then echo " OK: $f" else @@ -30,10 +30,35 @@ for f in \ .multi-agent/agents/guard.md \ .multi-agent/agents/executor.md \ .multi-agent/agents/verifier.md \ + .multi-agent/agents/memory_manager.md \ .multi-agent/workflows/info.yaml \ .multi-agent/workflows/modify.yaml \ + .multi-agent/workflows/analysis.yaml \ + .multi-agent/workflows/debug.yaml \ + .multi-agent/workflows/architecture.yaml \ + .multi-agent/workflows/risky.yaml \ .multi-agent/scripts/run_workflow.sh \ - .multi-agent/scripts/verify.py; do + .multi-agent/scripts/verify.py \ + .multi-agent/scripts/guard_check.py \ + .multi-agent/adapters/openclaw_adapter.md \ + .multi-agent/adapters/hermes_adapter.md \ + .multi-agent/adapters/claude_code_adapter.md \ + .multi-agent/memory/templates/daily.md; do + if [[ -f "$f" ]]; then + echo " OK: $f" + else + echo " MISSING: $f" + exit 1 + fi +done + +# Check examples +echo +echo "[3] Checking examples..." +for f in \ + examples/info-workflow.md \ + examples/modify-workflow.md \ + examples/architecture-review.md; do if [[ -f "$f" ]]; then echo " OK: $f" else @@ -44,8 +69,12 @@ done # Check docs echo -echo "[3] Checking docs..." -for f in docs/architecture.md docs/workflows.md docs/security.md; do +echo "[4] Checking docs..." +for f in \ + docs/architecture.md \ + docs/workflows.md \ + docs/security.md \ + docs/mimo-orbit.md; do if [[ -f "$f" ]]; then echo " OK: $f" else @@ -54,9 +83,19 @@ for f in docs/architecture.md docs/workflows.md docs/security.md; do fi done +# Check CI +echo +echo "[5] Checking GitHub Actions CI..." +if [[ -f ".github/workflows/public-smoke.yml" ]]; then + echo " OK: .github/workflows/public-smoke.yml" +else + echo " MISSING: .github/workflows/public-smoke.yml" + exit 1 +fi + # Syntax check bash scripts echo -echo "[4] Checking bash script syntax..." +echo "[6] Checking bash script syntax..." shopt -s nullglob for f in .multi-agent/scripts/*.sh tests/*.sh; do if bash -n "$f" 2>/dev/null; then @@ -70,7 +109,7 @@ shopt -u nullglob # Syntax check python scripts echo -echo "[5] Checking Python script syntax..." +echo "[7] Checking Python script syntax..." shopt -s nullglob for f in .multi-agent/scripts/*.py tests/*.py; do if python3 -m py_compile "$f" 2>/dev/null; then @@ -82,10 +121,9 @@ for f in .multi-agent/scripts/*.py tests/*.py; do done shopt -u nullglob -# Check forbidden directories are not tracked by git (runtime artifacts only) +# Check forbidden directories not tracked by git echo -echo "[6] Checking forbidden directories not tracked by git..." -# These directories should exist (runtime) but must not be git-tracked +echo "[8] Checking forbidden directories not tracked by git..." for dir in \ .multi-agent/logs \ .multi-agent/memory/daily \ @@ -100,9 +138,9 @@ for dir in \ fi done -# Check gitignore covers critical items +# Check .gitignore covers critical items echo -echo "[7] Checking .gitignore coverage..." +echo "[9] Checking .gitignore coverage..." required_patterns=( ".multi-agent/logs/" ".multi-agent/memory/daily/" @@ -122,5 +160,26 @@ for pat in "${required_patterns[@]}"; do fi done +# Check README has no obviously fake/inflated claims +echo +echo "[10] Checking README honesty..." +# Allow "not a production system" (honest disclaimer) but flag inflated claims +if grep -qi "fully functional\|production-ready\|fully integrated\|complete system" README.md 2>/dev/null; then + echo " WARNING: README may contain inflated claims" + exit 1 +else + echo " OK: README is appropriately modest" +fi + +# Check MiMo doc clearly marks integration as planned +echo +echo "[11] Checking MiMo doc status..." +if grep -qi "not.*integrat\|planned\|target\|future\|aspirational" docs/mimo-orbit.md 2>/dev/null; then + echo " OK: MiMo integration is clearly marked as planned" +else + echo " WARNING: MiMo integration status unclear in docs/mimo-orbit.md" + exit 1 +fi + echo echo "=== public smoke test passed ===" From 54d353623a2a6a5efb6809e79ae1a38b8179fe87 Mon Sep 17 00:00:00 2001 From: le Date: Wed, 29 Apr 2026 13:55:20 +0800 Subject: [PATCH 3/6] generalize model integration roadmap --- README.md | 19 +++---- docs/mimo-orbit.md | 86 ------------------------------- docs/model-integration-roadmap.md | 75 +++++++++++++++++++++++++++ tests/smoke_public.sh | 4 +- 4 files changed, 87 insertions(+), 97 deletions(-) delete mode 100644 docs/mimo-orbit.md create mode 100644 docs/model-integration-roadmap.md diff --git a/README.md b/README.md index 79e2354..582d56c 100644 --- a/README.md +++ b/README.md @@ -124,19 +124,20 @@ Most agent demos show a single LLM call. This skeleton shows how multiple agents This is a **workflow skeleton**, not a deployed product. It is useful for studying agent role separation, testing safety boundaries, and running local automation with review steps. -## MiMo Orbit Relevance +## Model Integration Roadmap -This project is designed to integrate **MiMo-V2.5-Pro** as the long-context reasoning engine for several key agent roles. The planned integration targets are: +## Model Integration Roadmap -- **Analyst planning** — multi-step plans with tool call reasoning over long task histories -- **Hermes review** — fallback reasoning review for ambiguous or high-stakes tasks -- **Verifier reasoning** — semantic output verification beyond diff and exit-code checks -- **Memory summarization** — condensing accumulated session records into reusable lessons -- **Workflow optimization** — learning from past Guard decisions to route more efficiently +The adapter layer in this skeleton is designed to make backend upgrades straightforward. The natural next step is replacing the rule-based Router, heuristic Guard, and diff-based Verifier with a more capable reasoning model. -**Current state:** The skeleton uses rule-based routing and heuristic Guard checks. MiMo-V2.5-Pro integration is a **planned development target**, not a current implementation. The adapter layer is designed to accommodate this integration when the API is available. +**MiMo-V2.5-Pro** is a strong candidate given its long-context reasoning capability for planning and review tasks. Planned integration targets: -See [docs/mimo-orbit.md](docs/mimo-orbit.md) for the full integration plan. +- **Analyst** — multi-step plans with long task history context +- **Verifier** — semantic output verification beyond diff and exit codes +- **Hermes-style review** — contextual risk reasoning instead of pattern deny-lists +- **Memory** — summarization of accumulated session records + +See [docs/model-integration-roadmap.md](docs/model-integration-roadmap.md) for the full roadmap. ## Roadmap diff --git a/docs/mimo-orbit.md b/docs/mimo-orbit.md deleted file mode 100644 index 7b31065..0000000 --- a/docs/mimo-orbit.md +++ /dev/null @@ -1,86 +0,0 @@ -# MiMo Orbit Integration - -> **Status: Planned** — This document describes a future integration target. MiMo-V2.5-Pro is NOT currently integrated into this skeleton. - -## Why MiMo-V2.5-Pro - -MiMo-V2.5-Pros extended context windows and long-context reasoning capability are relevant to several bottlenecks in the current skeleton: - -### Long-Context Planning - -The Analyst agent currently generates plans from a fixed-context prompt (scout findings + memory). For complex tasks with many sub-steps, the plan quality degrades as the context grows. MiMo-V2.5-Pro's extended context could maintain plan coherence across dozens of sub-steps without truncation. - -### Hermes as Fallback Reviewer - -Hermes currently works as a rule-based reviewer. With MiMo-V2.5-Pro the reviewer could: - -- Understand nuanced safety trade-offs in execution requests -- Detect when an approved-looking command has a hidden risky side effect -- Provide natural-language reasoning for why a request was flagged - -### Verifier Reasoning - -The Verifier currently checks output against a simple diff or exit-code check. MiMo-V2.5-Pros reasoning could: - -- Detect partial success (e.g., 9 of 10 files created) -- Identify semantic mismatches between goal and output -- Provide a natural-language diagnosis when verification fails - -### Memory Summarization - -The memory layer currently stores raw structured records. Over time, these accumulate. MiMo-V2.5-Procould: - -- Summarize a week's worth of task records into a compact lesson -- Extract reusable patterns from decision logs -- Prioritize which lessons are most relevant for a new task - -## Current State vs. Future State - -| Component | Current | Future with MiMo-V2.5-Pro| -|---|---|---| -| **Router** | Keyword matching | Semantic task classification | -| **Scout** | File search + grep | Deep codebase analysis | -| **Analyst** | Fixed-context planning | Long-context multi-step planning | -| **Hermes** | Rule-based review | Reasoning-based review | -| **Guard** | Pattern deny-list | Contextual risk assessment | -| **Verifier** | Exit code + diff | Semantic output verification | -| **Memory** | Raw templates | Summarized + retrievable lessons | - -## Planned Integration Points - -### 1. MiMo as Analyst Backend - -Replace the current fixed-prompt Analyst with a MiMo-V2.5-Procall. Input: scout findings + memory context. Output: structured plan with sub-steps. - -### 2. MiMo as Hermes Reviewer - -Route complex tasks (as detected by Router) to MiMo for a second review pass before Guard. MiMo reviews the Analyst's plan and provides a confidence score + reasoning. - -### 3. MiMo as Verifier - -After Executor completes, route output to MiMo for semantic verification. MiMo checks: did the execution actually satisfy the goal? Provide a natural-language verdict. - -### 4. MiMo for Memory Management - -Periodically run MiMo over accumulated lesson memory to produce summarized updates. Keep only the most relevant lessons in active context. - -## Not Claiming Current Integration - -**Important:** The skeleton currently does NOT have MiMo-V2.5-Prointegration. The adapter layer (`adapters/`) supports shell, OpenClaw, Hermes, and Claude Code. MiMo is a planned integration target, not a current feature. - -Do not claim in the README or docs that "MiMo is integrated" unless the adapter and runtime call actually exist in the codebase. - -## How to Add MiMo Integration - -When the integration is ready, the steps would be: - -1. Create `adapters/mimo_adapter.md` following the existing adapter pattern -2. Add `mimo` to the `supported_backends` list in `config.yaml` -3. Update `scripts/call_mimo.sh` (new file) for subprocess invocation -4. Update the Coordinator to route `analyst` / `hermes` / `verifier` calls to the MiMo adapter -5. Add MiMo API key to local `.env` (never committed) -6. Update this document and the README to reflect the integration - -## Eligibility Note - -This skeleton is being submitted for the **MiMo Orbit** program to support the development of the planned MiMo-V2.5-Prointegration described above. The integration does not exist yet — this application is for the capability development grant. diff --git a/docs/model-integration-roadmap.md b/docs/model-integration-roadmap.md new file mode 100644 index 0000000..37b856a --- /dev/null +++ b/docs/model-integration-roadmap.md @@ -0,0 +1,75 @@ +# Future Model Integration Roadmap + +## Overview + +Multi Agent Lab currently uses a rule-based Router, heuristic Guard checks, and shell/Python/Claude Code backends for execution. The adapter layer (`adapters/`) provides a clean separation between agent roles and backend implementations. + +The natural next step for this skeleton is to replace the rule-based components with a more capable reasoning model — one that can handle long task histories, provide nuanced safety assessments, and verify execution results semantically. + +## Current Backend Stack + +| Adapter | Role | Status | +|---|---|---| +| `shell_adapter.md` | Direct shell execution | Working | +| `openclaw_adapter.md` | OpenClaw agent dispatch | Working | +| `hermes_adapter.md` | Hermes advisory review | Working | +| `claude_code_adapter.md` | Claude Code session dispatch | Working | + +## Where a Capable Model Fits + +The most impactful integration points for a stronger model backend are: + +### Analyst — Multi-Step Planning + +The Analyst generates sub-step plans from Scout's findings. With a longer context window, the Analyst can: + +- Maintain plan coherence across dozens of sub-steps +- Reference relevant past lessons from memory without truncation +- Generate more robust tool-call sequences for complex tasks + +### Verifier — Semantic Result Checking + +The Verifier currently checks output with simple diffs and exit codes. A reasoning model can: + +- Detect partial success (e.g., 9 of 10 files created correctly) +- Identify semantic mismatches between the goal and actual output +- Provide a natural-language diagnosis when verification fails + +### Hermes-Style Review — Risk Reasoning + +Guard currently uses a pattern deny-list for safety. A model-based reviewer could: + +- Understand nuanced risk trade-offs in execution requests +- Detect hidden risky side effects in compound commands +- Explain in plain language why a request was flagged + +### Memory — Long-Term Summarization + +Over time, the memory layer accumulates structured records. A model can: + +- Summarize a week's worth of task records into reusable lessons +- Extract cross-task patterns from decision logs +- Maintain a prioritized, compact lesson context for new tasks + +## Candidate Models + +Any model that offers: + +- Extended context windows (64K+ tokens preferred) +- Strong instruction-following and tool-use capability +- Reasoning output in structured, machine-parseable format +- Reasonable latency for subprocess-style invocation + +**MiMo-V2.5-Pro** is a strong candidate for this role given its long-context reasoning focus and potential for planning and review tasks. + +## Integration Steps (When Ready) + +1. Add `adapters/mimo_adapter.md` following the existing adapter pattern +2. Add `mimo` to `supported_backends` in `config.yaml` +3. Create `scripts/call_mimo.sh` for subprocess invocation +4. Update the Coordinator to route `analyst`, `hermes`, and `verifier` calls to the new adapter +5. Configure the API key in a local `.env` file (never committed) + +## Current State + +The integrations described above are **planned**, not implemented. The current system uses rule-based routing and heuristic checks. The adapter layer is designed to make this upgrade straightforward when a model backend is available. diff --git a/tests/smoke_public.sh b/tests/smoke_public.sh index f1f3220..75fee64 100755 --- a/tests/smoke_public.sh +++ b/tests/smoke_public.sh @@ -74,7 +74,7 @@ for f in \ docs/architecture.md \ docs/workflows.md \ docs/security.md \ - docs/mimo-orbit.md; do + docs/model-integration-roadmap.md; do if [[ -f "$f" ]]; then echo " OK: $f" else @@ -174,7 +174,7 @@ fi # Check MiMo doc clearly marks integration as planned echo echo "[11] Checking MiMo doc status..." -if grep -qi "not.*integrat\|planned\|target\|future\|aspirational" docs/mimo-orbit.md 2>/dev/null; then +if grep -qi "not.*integrat\|planned\|target\|future\|aspirational" docs/model-integration-roadmap.md 2>/dev/null; then echo " OK: MiMo integration is clearly marked as planned" else echo " WARNING: MiMo integration status unclear in docs/mimo-orbit.md" From e94d9513f8ea94f8e50b6a2199315c996c19ab93 Mon Sep 17 00:00:00 2001 From: le Date: Wed, 29 Apr 2026 16:04:55 +0800 Subject: [PATCH 4/6] add public multi_agent/ skeleton: cleaned paths, templates, enhanced smoke test - Add multi_agent/ as public-clean skeleton (paths normalized to multi_agent/) - Add templates/env.example and templates/.multi-agent/config.yaml.example - Rewrite tests/smoke_public.sh with 12 checks (incl. secret scan) - Update .gitignore to cover multi_agent/logs/, workspace/, .env, and private dirs - Rewrite README.md with new structure, safety model, and setup instructions - Path references updated across docs/, examples/, multi_agent/ - No real API keys, tokens, secrets, or private paths committed --- .gitignore | 104 ++- README.md | 233 +++---- docs/architecture.md | 2 +- docs/security.md | 10 +- docs/workflows.md | 4 +- examples/architecture-review.md | 4 +- examples/info-workflow.md | 4 +- examples/modify-workflow.md | 22 +- multi_agent/README.md | 100 +++ multi_agent/adapters/claude_code_adapter.md | 28 + multi_agent/adapters/hermes_adapter.md | 36 ++ multi_agent/adapters/openclaw_adapter.md | 24 + multi_agent/adapters/shell_adapter.md | 27 + multi_agent/agents/analyst.md | 35 + multi_agent/agents/executor.md | 41 ++ multi_agent/agents/guard.md | 33 + multi_agent/agents/hermes_reviewer.md | 38 ++ multi_agent/agents/main.md | 34 + multi_agent/agents/memory_manager.md | 29 + multi_agent/agents/router.md | 34 + multi_agent/agents/scout.md | 41 ++ multi_agent/agents/verifier.md | 30 + multi_agent/config.yaml | 44 ++ multi_agent/memory/templates/daily.md | 12 + multi_agent/memory/templates/decision.md | 10 + multi_agent/memory/templates/failure.md | 10 + multi_agent/memory/templates/lesson.md | 8 + multi_agent/scripts/call_claude_code.sh | 179 ++++++ multi_agent/scripts/call_hermes.sh | 123 ++++ multi_agent/scripts/call_openclaw.sh | 188 ++++++ multi_agent/scripts/detect_tools.sh | 56 ++ multi_agent/scripts/guard_check.py | 144 +++++ multi_agent/scripts/run_workflow.sh | 668 ++++++++++++++++++++ multi_agent/scripts/smoke_test.sh | 73 +++ multi_agent/scripts/verify.py | 139 ++++ multi_agent/scripts/write_memory.py | 136 ++++ multi_agent/workflows/analysis.yaml | 15 + multi_agent/workflows/architecture.yaml | 19 + multi_agent/workflows/debug.yaml | 17 + multi_agent/workflows/info.yaml | 13 + multi_agent/workflows/modify.yaml | 17 + multi_agent/workflows/risky.yaml | 18 + templates/.multi-agent/config.yaml.example | 94 +++ templates/env.example | 42 ++ tests/smoke_public.sh | 101 +-- 45 files changed, 2818 insertions(+), 221 deletions(-) create mode 100644 multi_agent/README.md create mode 100644 multi_agent/adapters/claude_code_adapter.md create mode 100644 multi_agent/adapters/hermes_adapter.md create mode 100644 multi_agent/adapters/openclaw_adapter.md create mode 100644 multi_agent/adapters/shell_adapter.md create mode 100644 multi_agent/agents/analyst.md create mode 100644 multi_agent/agents/executor.md create mode 100644 multi_agent/agents/guard.md create mode 100644 multi_agent/agents/hermes_reviewer.md create mode 100644 multi_agent/agents/main.md create mode 100644 multi_agent/agents/memory_manager.md create mode 100644 multi_agent/agents/router.md create mode 100644 multi_agent/agents/scout.md create mode 100644 multi_agent/agents/verifier.md create mode 100644 multi_agent/config.yaml create mode 100644 multi_agent/memory/templates/daily.md create mode 100644 multi_agent/memory/templates/decision.md create mode 100644 multi_agent/memory/templates/failure.md create mode 100644 multi_agent/memory/templates/lesson.md create mode 100755 multi_agent/scripts/call_claude_code.sh create mode 100755 multi_agent/scripts/call_hermes.sh create mode 100755 multi_agent/scripts/call_openclaw.sh create mode 100755 multi_agent/scripts/detect_tools.sh create mode 100755 multi_agent/scripts/guard_check.py create mode 100755 multi_agent/scripts/run_workflow.sh create mode 100755 multi_agent/scripts/smoke_test.sh create mode 100755 multi_agent/scripts/verify.py create mode 100755 multi_agent/scripts/write_memory.py create mode 100644 multi_agent/workflows/analysis.yaml create mode 100644 multi_agent/workflows/architecture.yaml create mode 100644 multi_agent/workflows/debug.yaml create mode 100644 multi_agent/workflows/info.yaml create mode 100644 multi_agent/workflows/modify.yaml create mode 100644 multi_agent/workflows/risky.yaml create mode 100644 templates/.multi-agent/config.yaml.example create mode 100644 templates/env.example diff --git a/.gitignore b/.gitignore index bfdddfc..9b61174 100644 --- a/.gitignore +++ b/.gitignore @@ -1,31 +1,81 @@ -# secrets +# ============================================================================= +# Multi Agent Lab — .gitignore +# ============================================================================= +# These paths MUST NOT be committed — they contain runtime data, secrets, +# or machine-specific paths. If you see any of these in a commit, remove them. +# ============================================================================= + +# ----------------------------------------------------------------------------- +# Runtime Logs & Memory (most critical) +# ----------------------------------------------------------------------------- +**/logs/ +**/memory/daily/ +**/memory/failures/ +**/memory/decisions/ +**/workspace/ +**/*.log + +# ----------------------------------------------------------------------------- +# Environment & Secrets +# ----------------------------------------------------------------------------- .env -.env.* -*.key +.env.local +.env.*.local *.pem -*.token -credentials* -secrets* - -# private local dirs -.ssh/ -.aws/ -.azure/ -.claude/ -.codex/ -.openclaw/ -.config/ - -# runtime generated files -.multi-agent/logs/ -.multi-agent/memory/daily/ -.multi-agent/memory/failures/ -.multi-agent/memory/decisions/ -.multi-agent/workspace/*.txt - -# cache -.cache/ -node_modules/ +*.key +*.crt +credentials/ +secrets/ + +# ----------------------------------------------------------------------------- +# Private Runtime Directories +# ----------------------------------------------------------------------------- +# These are home-directory configs and should never be copied into a repo +**/.openclaw/ +**/.ssh/ +**/.aws/ +**/.claude/ +**/.codex/ +**/.config/ +**/.hermes/ + +# ----------------------------------------------------------------------------- +# Python +# ----------------------------------------------------------------------------- __pycache__/ -*.log +*.py[cod] +*.pyo +*.egg-info/ +dist/ +build/ +.eggs/ + +# ----------------------------------------------------------------------------- +# OS +# ----------------------------------------------------------------------------- .DS_Store +Thumbs.db +*.swp +*.swo +*~ + +# ----------------------------------------------------------------------------- +# Editor +# ----------------------------------------------------------------------------- +.vscode/ +.idea/ +*.sublime-* + +# ----------------------------------------------------------------------------- +# Test / CI artifacts +# ----------------------------------------------------------------------------- +coverage/ +htmlcov/ +.pytest_cache/ + +# ----------------------------------------------------------------------------- +# Node (if any) +# ----------------------------------------------------------------------------- +node_modules/ +package-lock.json +yarn.lock diff --git a/README.md b/README.md index 582d56c..d40804c 100644 --- a/README.md +++ b/README.md @@ -2,163 +2,136 @@ [![Public Smoke Test](https://github.com/qian-le/multi-agent-lab/actions/workflows/public-smoke.yml/badge.svg)](https://github.com/qian-le/multi-agent-lab/actions/workflows/public-smoke.yml) -A local multi-agent workflow skeleton built around OpenClaw, Hermes, and Claude Code. - -## Project Scope - -This repository contains a **workflow orchestration framework** for multi-agent collaboration on a single machine. The system routes tasks through specialized agents — Router, Scout, Analyst, Hermes Reviewer, Guard, Executor, Verifier, and Memory Manager — to plan, review, and execute work safely within a sandboxed workspace. - -It is designed as a local development and experimentation environment, not a hosted or production system. - -## Core Features - -- **Router** — classifies incoming goals into workflow types -- **Scout** — read-only inspection of workspace and memory -- **Analyst** — plans approach and sub-steps -- **Hermes Reviewer** — fallback review for complex or ambiguous tasks -- **Guard** — mandatory safety gate before any execution -- **Executor** — runs actions via shell, Python, or Claude Code -- **Verifier** — checks executor output matches intent -- **Memory Manager** — reads/writes structured memory templates -- **Safe workspace boundary** — all writes stay under `.multi-agent/workspace/` - -## System Architecture - -```mermaid -flowchart TD - U[User Goal] --> M[Main Coordinator] - M --> R[Router
classify task type] - R --> S[Scout
read-only inspection] - S --> A[Analyst
planning] - A --> H[Hermes Reviewer
complex tasks only] - H --> G[Guard
safety gate] - A --> G - G -->|allow| E[Executor
shell / python / claude_code] - G -->|deny| STOP[Pipeline Stopped] - E --> V[Verifier
check result] - V -->|pass| MEM[Memory Manager
record to templates] - V -->|fail| FAIL[Reported as Failure] -``` +A local multi-agent workflow skeleton demonstrating how to route tasks through specialized agents — Scout, Analyst, Hermes Reviewer, Guard, Executor, and Verifier — with a mandatory safety gate before any execution. + +This is a **learning and experimentation skeleton**, not a production system. It runs entirely on a single machine and is designed to teach agent role separation, safety-first execution, and memory management. -Text overview: +## Project Structure ``` -User Goal - └─> Main Coordinator - └─> Router (classify task type) - ├─> Scout (inspect) - ├─> Analyst (plan) - ├─> Hermes Reviewer (complex tasks) - ├─> Guard (safety gate) - ├─> Executor (run) [only if Guard approves] - ├─> Verifier (check result) - └─> Memory Manager (record) +multi_agent_lab/ +├── multi_agent/ # Core skeleton code +│ ├── agents/ # Agent role definitions +│ ├── adapters/ # Backend adapter layer +│ ├── workflows/ # Workflow type definitions +│ ├── scripts/ # Shell/Python runner scripts +│ ├── memory/templates/ # Structured memory templates +│ └── config.yaml # Workflow routing configuration +├── docs/ # Architecture, workflow, and integration docs +├── examples/ # Sanitized example runs (no real logs) +├── tests/ # Public smoke test +├── templates/ # Example config templates +│ └── .multi-agent/ # Example local config (no real secrets) +├── .github/workflows/ +└── README.md ``` -## Supported Workflows - -| Workflow | Trigger | Stops at | -|---|---|---| -| `info` | file inspection, search, read | never executes | -| `analysis` | evaluation, comparison, planning | never executes | -| `modify` | create/update files in workspace | Guard or Verifier | -| `debug` | diagnose failures | Guard blocks risky ops | -| `architecture` | design review, dry-run | always a plan only | -| `risky` | sudo, system paths, rm -rf | Guard blocks by default | - ## Quick Start ```bash -# Detect available runtimes -bash .multi-agent/scripts/detect_tools.sh +# Check that all scripts have valid syntax +bash tests/smoke_public.sh -# Dry-run an info workflow -bash .multi-agent/scripts/run_workflow.sh --type info --goal "list all agent files" +# Detect available backends +bash multi_agent/scripts/detect_tools.sh -# Dry-run an architecture review -bash .multi-agent/scripts/run_workflow.sh --type architecture --goal "review current agent roles" +# Dry-run an info workflow (read-only, never executes) +bash multi_agent/scripts/run_workflow.sh --type info --goal "list all agent files" -# Run the public smoke test -bash tests/smoke_public.sh +# Dry-run a modify workflow (stops at Guard) +bash multi_agent/scripts/run_workflow.sh --type modify --goal "create a hello world file" ``` -## Repository Layout +## Core Agent Roles -``` -multi-agent-lab/ -├── .multi-agent/ # Core skeleton -│ ├── README.md # Skeleton overview -│ ├── config.yaml # Workflow routing config -│ ├── agents/ # Agent role definitions -│ ├── adapters/ # Backend adapters (shell, openclaw, hermes, claude_code) -│ ├── workflows/ # Workflow type definitions -│ ├── scripts/ # Runner, verifier, guard check, memory writer -│ ├── memory/ -│ │ ├── templates/ # Daily, decision, failure, lesson templates -│ │ ├── project/ # Project status -│ │ └── lessons/ # Cross-task learnings -│ └── workspace/ # Sandbox for executor writes -├── docs/ # Architecture, security, workflow guides -├── examples/ # Example runs (read-only, no real logs) -├── tests/ # Public smoke test -├── README.md # This file -├── LICENSE # MIT -├── CONTRIBUTING.md -├── SECURITY.md -└── ROADMAP.md -``` +| Agent | Responsibility | +|---|---| +| **Router** | Classifies incoming goal into workflow type | +| **Scout** | Read-only inspection of workspace and memory | +| **Analyst** | Plans approach and sub-steps | +| **Hermes Reviewer** | Fallback reasoning for complex tasks | +| **Guard** | Mandatory safety gate — blocks before Executor runs | +| **Executor** | Runs actions via shell, Python, or Claude Code | +| **Verifier** | Checks executor output matches the original intent | +| **Memory Manager** | Reads/writes structured memory templates | ## Safety Model -The system enforces a **Guard before Executor** policy: +- **Guard before Executor**: Every execution passes through Guard first +- **Workspace-only writes**: Executor can only write to `multi_agent/workspace/` +- **No secret logging**: API keys, tokens, and credentials are never written to memory or logs +- **Deny list**: Commands like `sudo`, `rm -rf /`, recursive chmod/chown on system paths are blocked by default -- Guard evaluates all execution requests against a deny-list -- Forbidden: `sudo`, `rm -rf /`, `chmod -R`, `chown -R`, system paths (`/etc`, `/usr`, `/var`, `/opt`) -- Executor writes are **workspace-only** (`.multi-agent/workspace/`) -- No credentials, tokens, keys, or secrets are ever logged -- All memory writes go to structured templates, not raw logs +See [docs/security.md](docs/security.md) for the full threat model. -## Why This Matters +## Environment Setup -Most agent demos show a single LLM call. This skeleton shows how multiple agents with different responsibilities can collaborate — with a safety gate that actually stops destructive operations, and a verifier that checks whether the executor actually did what was asked. +This project requires **no external services** to run the skeleton. The smoke test and dry-runs work out of the box. -This is a **workflow skeleton**, not a deployed product. It is useful for studying agent role separation, testing safety boundaries, and running local automation with review steps. +For real agent runtime calls (OpenClaw, Hermes, Claude Code), you need to set up your own backend: -## Model Integration Roadmap +```bash +# Required environment variables (copy templates and fill in your values) +cp templates/env.example .env -## Model Integration Roadmap +# Edit .env with your actual API keys and paths +nano .env +``` + +See `templates/.multi-agent/config.yaml.example` for the full config template with field documentation. + +## What Cannot Be Committed -The adapter layer in this skeleton is designed to make backend upgrades straightforward. The natural next step is replacing the rule-based Router, heuristic Guard, and diff-based Verifier with a more capable reasoning model. +The following are automatically excluded via `.gitignore`. If you see them in a commit, something is wrong: -**MiMo-V2.5-Pro** is a strong candidate given its long-context reasoning capability for planning and review tasks. Planned integration targets: +- `multi_agent/logs/` — runtime message logs +- `multi_agent/memory/daily/` — session day logs +- `multi_agent/memory/failures/` — failure records +- `multi_agent/memory/decisions/` — decision logs +- `multi_agent/workspace/` — executor output files +- `.env` — contains real API keys and tokens +- `~/.openclaw/`, `~/.ssh/`, `~/.aws/`, `~/.claude/` — private runtime dirs +- `*.pyc`, `__pycache__/`, `*.pyo` +- Any file containing real tokens, API keys, or secrets -- **Analyst** — multi-step plans with long task history context -- **Verifier** — semantic output verification beyond diff and exit codes +## Smoke Test + +```bash +bash tests/smoke_public.sh +``` + +The smoke test checks: +1. All required files exist +2. All bash scripts have valid syntax +3. All Python scripts compile without error +4. Forbidden directories (logs, memory, workspace) are not tracked by git +5. `.gitignore` covers all critical items +6. README makes no inflated claims +7. Integration docs clearly mark planned items as planned + +## Backend Adapters + +The skeleton ships with four backend adapters in `multi_agent/adapters/`: + +| Adapter | Backend | Status | +|---|---|---| +| `shell_adapter.md` | Direct `bash` / `python3` | Always available | +| `openclaw_adapter.md` | OpenClaw agent dispatch | Requires OpenClaw runtime | +| `hermes_adapter.md` | Hermes advisory review | Requires Hermes CLI | +| `claude_code_adapter.md` | Claude Code session | Requires Claude Code | + +Each adapter documents how to configure the backend and what the call/response format looks like. + +## Model Integration Roadmap + +Planned integration targets for a long-context reasoning model (e.g. MiMo-V2.5-Pro): + +- **Analyst** — multi-step planning with long task history +- **Verifier** — semantic output verification beyond diff/exit-code - **Hermes-style review** — contextual risk reasoning instead of pattern deny-lists -- **Memory** — summarization of accumulated session records - -See [docs/model-integration-roadmap.md](docs/model-integration-roadmap.md) for the full roadmap. - -## Roadmap - -### Near-term -- Polish public documentation and examples -- Expand smoke tests for all workflow types -- Add more local verifier checks - -### Mid-term -- Integrate OpenClaw runtime agent calls (real subprocess routing) -- Improve Claude Code backend adapter -- Improve Hermes non-interactive reviewer mode -- Add richer memory retrieval (semantic vs keyword) - -### Long-term -- **MiMo-V2.5-Pro based Analyst** — long-context planner -- **MiMo-V2.5-Pro based Verifier** — semantic output verification -- **MiMo-V2.5-Pro based Hermes Reviewer** — contextual risk reasoning -- Multi-model routing based on task complexity -- Task graph execution with dependency tracking -- Optional web dashboard for workflow monitoring +- **Memory summarization** — condensing accumulated session records + +See [docs/model-integration-roadmap.md](docs/model-integration-roadmap.md) for the full plan. MiMo-V2.5-Pro integration is **planned**, not currently implemented. ## License diff --git a/docs/architecture.md b/docs/architecture.md index 8d72b9f..cb7b503 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -103,7 +103,7 @@ The memory layer is **read by Scout** at startup so future tasks have context. I The executor may only write to: ``` -.multi-agent/workspace/ +multi_agent/workspace/ ``` Files outside this boundary require Guard approval with elevated justification. The shell adapter checks all write paths before executing. diff --git a/docs/security.md b/docs/security.md index 7afe0f9..dff7235 100644 --- a/docs/security.md +++ b/docs/security.md @@ -22,7 +22,7 @@ Guard evaluates every execution request against a deny-list. The following are a | Recursive chmod/chown | `chmod -R 777 /` | | System directories | `/etc`, `/usr`, `/var`, `/opt`, `/root` | | Credential access | Commands that read `~/.ssh/`, `~/.aws/`, `~/.netrc` | -| Attempt to escape workspace | Any write outside `.multi-agent/workspace/` | +| Attempt to escape workspace | Any write outside `multi_agent/workspace/` | Guard returns `DENIED` for these cases. The executor never runs. @@ -31,7 +31,7 @@ Guard returns `DENIED` for these cases. The executor never runs. The executor may only write to: ``` -.multi-agent/workspace/ +multi_agent/workspace/ ``` Write operations that target paths outside this boundary are blocked by the shell adapter **before** Guard is even consulted. This is enforced as a path check in the adapter layer. @@ -81,7 +81,7 @@ cd /path/to/multi-agent-lab bash tests/smoke_public.sh # 3. Run a workflow with a sandboxed goal -bash .multi-agent/scripts/run_workflow.sh \ +bash multi_agent/scripts/run_workflow.sh \ --type info \ --goal "list all markdown files in this project" @@ -89,8 +89,8 @@ bash .multi-agent/scripts/run_workflow.sh \ git status --short # 5. If logs/ or memory/daily/ appeared, clean them up -rm -rf .multi-agent/logs .multi-agent/memory/daily/* -git checkout -- .multi-agent/logs .multi-agent/memory/daily +rm -rf multi_agent/logs multi_agent/memory/daily/* +git checkout -- multi_agent/logs multi_agent/memory/daily ``` ## Safe Contributor Checklist diff --git a/docs/workflows.md b/docs/workflows.md index 906f03c..cd66a11 100644 --- a/docs/workflows.md +++ b/docs/workflows.md @@ -38,7 +38,7 @@ Executor never runs. Analyst produces a plan, Hermes reviews it. ## modify -**Trigger:** Task creates or updates files inside `.multi-agent/workspace/`. +**Trigger:** Task creates or updates files inside `multi_agent/workspace/`. **Pipeline:** ``` @@ -90,7 +90,7 @@ No executor. No side effects. A pure review pipeline. ## risky -**Trigger:** Task touches system paths, uses sudo, or modifies outside `.multi-agent/workspace/`. +**Trigger:** Task touches system paths, uses sudo, or modifies outside `multi_agent/workspace/`. **Pipeline:** ``` diff --git a/examples/architecture-review.md b/examples/architecture-review.md index ba5e249..ad7a4f9 100644 --- a/examples/architecture-review.md +++ b/examples/architecture-review.md @@ -17,9 +17,9 @@ This example demonstrates an `architecture` workflow — a dry-run review of the ### 2. Scout.inspect() **Actions:** -- Reads all agent definition files in `.multi-agent/agents/` +- Reads all agent definition files in `multi_agent/agents/` - Reads `docs/architecture.md` for system overview -- Reads `.multi-agent/memory/lessons/` for past architecture decisions +- Reads `multi_agent/memory/lessons/` for past architecture decisions **Findings:** ``` diff --git a/examples/info-workflow.md b/examples/info-workflow.md index af02bb6..60f9693 100644 --- a/examples/info-workflow.md +++ b/examples/info-workflow.md @@ -17,8 +17,8 @@ This example demonstrates a read-only `info` workflow. No files are created or m ### 2. Scout.inspect() **Actions:** -- Reads `.multi-agent/agents/` directory contents -- Reads `.multi-agent/memory/project/status.md` for recent context +- Reads `multi_agent/agents/` directory contents +- Reads `multi_agent/memory/project/status.md` for recent context - No filesystem modification **Findings:** diff --git a/examples/modify-workflow.md b/examples/modify-workflow.md index 07607a8..4fddcb9 100644 --- a/examples/modify-workflow.md +++ b/examples/modify-workflow.md @@ -17,7 +17,7 @@ This example demonstrates a `modify` workflow that safely creates a file inside ### 2. Scout.inspect() **Actions:** -- Confirms `.multi-agent/workspace/` directory exists and is writable +- Confirms `multi_agent/workspace/` directory exists and is writable - No existing `demo.txt` (first creation) **Findings:** @@ -36,7 +36,7 @@ workspace/ steps: - id: "1" kind: "action" - task: "Write 'hello agent' to .multi-agent/workspace/demo.txt" + task: "Write 'hello agent' to multi_agent/workspace/demo.txt" ``` ### 4. Guard.evaluate() @@ -45,7 +45,7 @@ steps: ```json { "action": "write_file", - "path": ".multi-agent/workspace/demo.txt", + "path": "multi_agent/workspace/demo.txt", "content": "hello agent" } ``` @@ -60,12 +60,12 @@ steps: **Command executed:** ```bash -echo "hello agent" > .multi-agent/workspace/demo.txt +echo "hello agent" > multi_agent/workspace/demo.txt ``` **Output:** ``` -File created: .multi-agent/workspace/demo.txt +File created: multi_agent/workspace/demo.txt Bytes written: 12 ``` @@ -75,12 +75,12 @@ Bytes written: 12 ```json { "goal": "Create demo.txt with content 'hello agent'", - "executor_output": "File created: .multi-agent/workspace/demo.txt\nBytes written: 12" + "executor_output": "File created: multi_agent/workspace/demo.txt\nBytes written: 12" } ``` **Actions:** -- Reads `.multi-agent/workspace/demo.txt` +- Reads `multi_agent/workspace/demo.txt` - Compares content to expected value **Output:** `VERIFIED` @@ -96,14 +96,14 @@ Bytes written: 12 After running: ```bash -bash .multi-agent/scripts/run_workflow.sh \ +bash multi_agent/scripts/run_workflow.sh \ --type modify \ --goal "Create demo.txt in workspace with content hello agent" ``` **Workspace result:** ```bash -$ cat .multi-agent/workspace/demo.txt +$ cat multi_agent/workspace/demo.txt hello agent ``` @@ -112,7 +112,7 @@ hello agent # Guard Decision — 2026-04-29 ## Action -write_file: .multi-agent/workspace/demo.txt +write_file: multi_agent/workspace/demo.txt ## Decision APPROVED @@ -125,7 +125,7 @@ Inside workspace boundary. Plain text content. No credential access. ## Safety Notes -- The write is constrained to `.multi-agent/workspace/` +- The write is constrained to `multi_agent/workspace/` - Guard would block any write to `~/.ssh/`, `/etc/`, or system paths - The file content is verified by Verifier after creation - No secrets or tokens were involved diff --git a/multi_agent/README.md b/multi_agent/README.md new file mode 100644 index 0000000..1f5a453 --- /dev/null +++ b/multi_agent/README.md @@ -0,0 +1,100 @@ +# OpenClaw Multi-Agent OS + +这是一个放在当前项目内的本地多 Agent 协作闭环。它不替换已有 `.openclaw/`,而是在 `multi_agent/` 中提供稳定的调度、审查、执行、验证和记忆层。 + +## Agent 职责 + +- Main Coordinator: 理解目标、选择 workflow、调度子 agent、汇总结果、触发 memory。 +- Router: 分类 `info`、`analysis`、`debug`、`modify`、`architecture`、`risky`。 +- Scout: 只读侦察,只能查看文件、目录、日志和 git 状态。 +- Analyst: 只分析和制定方案,不执行、不改文件。 +- Hermes Reviewer: 高级审查、第二意见、风险识别,输出 `approve` / `revise` / `reject`。 +- Guard: 执行前安全门,只有 `decision: allow` 才能继续。 +- Executor: 按 Guard 允许范围选择 `shell`、`python` 或 `claude_code` 后端执行。 +- Verifier: 检查文件、内容、命令和日志,输出 `pass` / `fail` / `partial`。 +- Memory Manager: 写 daily、project、decisions、failures、lessons。 + +## 工作流 + +- 查询类: `main -> router -> scout -> memory -> main` +- 分析类: `main -> router -> scout -> analyst -> guard -> memory -> main` +- 普通修改类: `main -> router -> scout -> analyst -> guard -> executor -> verifier -> memory -> main` +- Debug 类: `main -> router -> scout -> analyst -> guard -> executor -> verifier -> memory -> main` +- 架构/复杂重构类: `main -> router -> scout -> analyst -> hermes -> analyst_revision -> guard -> executor -> verifier -> memory -> main` +- 高风险类: `main -> router -> scout -> analyst -> hermes -> guard -> ask_user -> memory -> main` + +## Hermes 接入 + +`scripts/call_hermes.sh` 会检测 `hermes` 命令。存在时尝试把 Analyst Plan 交给 Hermes;不可用或无输出时生成 fallback review,不让流程崩溃。Hermes 不直接修改文件,只输出审查意见。 + +## Claude Code 接入 + +`scripts/call_claude_code.sh` 会检测 `claude` 命令。复杂任务选择 `--backend claude_code` 时,它会把用户目标、Analyst 方案、Hermes 审查、Guard 范围、禁止动作和验证方式打包成边界清晰的 prompt。`claude` 不存在时会优雅失败并写日志。 + +## Guard 安全规则 + +Guard 会拦截递归强删除、`sudo`、递归权限修改、系统目录修改、凭据相关操作、安装命令和范围不清晰的计划。`decision: ask_user` 或 `decision: deny` 都会停止执行。 + +## Verifier 验证规则 + +Verifier 支持检查文件存在、文件内容、命令退出码和日志存在。没有证据不会输出 `pass`。每次 Executor 运行后必须进入 Verifier。 + +## Memory + +- `memory/daily/`: 每次任务追加当天记录。 +- `memory/project/status.md`: 当前项目状态。 +- `memory/decisions/`: `architecture` 任务的决策记录。 +- `memory/failures/`: 验证失败的任务记录。 +- `memory/lessons/`: 可复用经验。 +- `memory/templates/`: daily、decision、failure、lesson 模板。 + +Memory 写入会做基础敏感值脱敏,不记录 API key、token、cookie、password、secret 等值。 + +## 常用命令 + +```bash +bash multi_agent/scripts/detect_tools.sh +``` + +```bash +bash multi_agent/scripts/smoke_test.sh +``` + +```bash +bash multi_agent/scripts/run_workflow.sh --type info --goal "inspect project structure" +``` + +```bash +bash multi_agent/scripts/run_workflow.sh --type modify --goal "create workspace/test.txt with hello multi-agent" --backend shell +``` + +```bash +bash multi_agent/scripts/run_workflow.sh --type architecture --goal "review multi-agent workflow design" --backend claude_code +``` + +## 新增 Agent + +1. 在 `agents/` 下新增 `.md`。 +2. 明确职责、输入、输出格式、禁止动作。 +3. 在相关 workflow 的 `stages` 中加入该 agent。 +4. 在 `run_workflow.sh` 或外部 OpenClaw 调度中增加对应阶段。 +5. 增加验证和 memory 字段,确保可排查。 + +## 新增 Workflow + +1. 在 `workflows/` 下新增 `.yaml`。 +2. 定义 stages、是否需要 Guard、Verifier、Hermes、用户确认。 +3. 更新 `config.yaml` 的 `workflows`。 +4. 扩展 Router 分类规则。 +5. 增加 smoke 或专项验证命令。 + +## 故障排查 + +- 工具不存在: 运行 `bash multi_agent/scripts/detect_tools.sh`。 +- Guard 停止: 查看 `multi_agent/logs/messages/*_guard.md`。 +- Hermes 不可用: 查看 `multi_agent/logs/runs/*_hermes.log`,fallback review 会写入 messages。 +- Claude Code 不可用: 查看 `multi_agent/logs/runs/*_claude_code.log`。 +- 验证失败: 查看 `multi_agent/logs/messages/*_verifier.md` 和 `memory/failures/`。 +- 没有 daily memory: 检查 `write_memory.py` 是否可执行,并查看 run log。 +- `workspace/test.txt` 缺失: 运行 smoke test,确认 Guard 是否 allow、Executor 是否执行、Verifier 是否 pass。 + diff --git a/multi_agent/adapters/claude_code_adapter.md b/multi_agent/adapters/claude_code_adapter.md new file mode 100644 index 0000000..1f06547 --- /dev/null +++ b/multi_agent/adapters/claude_code_adapter.md @@ -0,0 +1,28 @@ +# Claude Code Adapter + +Claude Code 是复杂代码任务的强执行后端。 + +## 适用场景 + +- 多文件代码修改。 +- 复杂重构。 +- 需要理解项目上下文的 debug。 +- 架构方案已经通过 Hermes 和 Guard 后的受限执行。 + +## 调用 prompt 必须包含 + +- 用户目标。 +- Analyst 方案。 +- Hermes 审查意见,如果有。 +- Guard 允许范围。 +- 禁止动作。 +- 验证方式。 + +## 执行边界 + +- 只能执行 Guard `allow` 的计划。 +- 不能扩大范围。 +- 不能删除文件。 +- 不能跳过 Verifier。 +- 如果 `claude` 命令不存在,必须优雅失败并写日志。 + diff --git a/multi_agent/adapters/hermes_adapter.md b/multi_agent/adapters/hermes_adapter.md new file mode 100644 index 0000000..6af2de5 --- /dev/null +++ b/multi_agent/adapters/hermes_adapter.md @@ -0,0 +1,36 @@ +# Hermes Adapter + +Hermes 是高级审查和第二意见系统。 + +## 角色 + +- 高级架构审查。 +- 第二意见。 +- 风险识别。 +- 判断是否过度设计。 +- 输出 `approve` / `revise` / `reject`。 + +## 输入 + +- 用户目标。 +- Scout Report。 +- Analyst Plan。 +- 相关约束。 + +## 输出 + +```markdown +## Hermes Review + +- 总体评价: +- 方案优点: +- 潜在问题: +- 遗漏风险: +- 优化建议: +- 最终建议: approve | revise | reject +``` + +## Fallback + +如果 `hermes` 命令不可用,`scripts/call_hermes.sh` 生成审查模板并标记为 fallback,不让流程因为工具缺失而崩溃。 + diff --git a/multi_agent/adapters/openclaw_adapter.md b/multi_agent/adapters/openclaw_adapter.md new file mode 100644 index 0000000..7c947c7 --- /dev/null +++ b/multi_agent/adapters/openclaw_adapter.md @@ -0,0 +1,24 @@ +# OpenClaw Adapter + +OpenClaw 是总调度入口。`.multi-agent` 不破坏已有 `.openclaw/`,只通过配置引用它。 + +## 角色 + +- 接收用户目标。 +- 选择 Router 和 workflow。 +- 调用子 agent。 +- 汇总状态。 +- 触发 Guard、Verifier、Memory。 + +## 集成边界 + +- `.openclaw/` 保持为现有运行时目录。 +- `multi_agent/config.yaml` 中的 `project.openclaw_config` 指向 `.openclaw`。 +- `scripts/call_openclaw.sh` 只做受限调用或生成 fallback 调度模板。 + +## 禁止 + +- 自动修改 `.openclaw/credentials`。 +- 打印 OpenClaw 内部凭据。 +- 绕过 Guard 直接执行。 + diff --git a/multi_agent/adapters/shell_adapter.md b/multi_agent/adapters/shell_adapter.md new file mode 100644 index 0000000..8f61c90 --- /dev/null +++ b/multi_agent/adapters/shell_adapter.md @@ -0,0 +1,27 @@ +# Shell Adapter + +Shell Adapter 只处理低风险、边界明确的小任务。 + +## 适用场景 + +- 创建 `multi_agent/workspace` 下的测试文件。 +- 只读检查。 +- 运行验证命令。 +- 简单、可审查、可回滚的本地操作。 + +## 禁止 + +- `rm -rf` +- `sudo` +- `chmod -R` +- `chown -R` +- 修改系统目录。 +- 打印或复制凭据。 +- 执行 Guard 未允许的命令。 + +## 原则 + +- 默认不解释用户 goal 为任意 shell。 +- 只执行脚本内部明确支持的安全动作。 +- 所有动作写入 run log。 + diff --git a/multi_agent/agents/analyst.md b/multi_agent/agents/analyst.md new file mode 100644 index 0000000..815cfde --- /dev/null +++ b/multi_agent/agents/analyst.md @@ -0,0 +1,35 @@ +# Analyst + +Analyst 只负责分析和制定方案,不能执行命令,不能修改文件。 + +## 输入 + +- 用户目标。 +- Router 分类。 +- Scout Report。 +- 现有约束和安全策略。 + +## 必须输出 + +```markdown +## Analyst Plan + +- 问题判断: +- 根因分析: +- 推荐方案: +- 备选方案: +- 需要修改的文件: +- 执行步骤: +- 风险等级: +- 验证方法: +- 是否需要 Hermes 审查: +``` + +## 规则 + +- 方案必须限定文件范围。 +- 执行步骤必须可被 Guard 审查。 +- 风险不清晰时标记为 `medium` 或 `high`。 +- 架构、跨多文件、复杂重构必须标记需要 Hermes 审查。 +- 不允许把模糊大任务交给 Executor。 + diff --git a/multi_agent/agents/executor.md b/multi_agent/agents/executor.md new file mode 100644 index 0000000..0de0d8b --- /dev/null +++ b/multi_agent/agents/executor.md @@ -0,0 +1,41 @@ +# Executor + +Executor 只执行被 Guard 明确允许的任务。 + +## 职责 + +- 读取 Guard `allowed_scope`。 +- 根据任务复杂度选择 `shell`、`python` 或 `claude_code` 后端。 +- 执行最小必要改动。 +- 记录日志。 +- 把结果交给 Verifier。 + +## 禁止 + +- 扩大范围。 +- 执行 Guard 未允许的操作。 +- 删除文件。 +- 跳过 Verifier。 +- 自称验证成功。 +- 打印或记录密钥。 + +## 必须输出 + +```markdown +## Executor Result + +- 执行目标: +- 执行步骤: +- 修改文件: +- 运行命令: +- 成功项: +- 失败项: +- 需要 Verifier 检查的内容: +``` + +## 后端选择 + +- 小任务使用 `shell`。 +- 本地脚本和结构化文本处理使用 `python`。 +- 复杂代码任务使用 `claude_code`,但必须传入清晰边界和 Guard 允许范围。 + diff --git a/multi_agent/agents/guard.md b/multi_agent/agents/guard.md new file mode 100644 index 0000000..e2aff52 --- /dev/null +++ b/multi_agent/agents/guard.md @@ -0,0 +1,33 @@ +# Guard + +Guard 是执行前最后一道门。没有 Guard 的 `decision: allow`,Executor 不得执行。 + +## 固定输出格式 + +```text +[Guard Decision] +decision: allow / deny / ask_user +risk_level: low / medium / high +reason: +allowed_scope: +forbidden_actions: +``` + +## 规则 + +- 出现删除、批量覆盖、权限修改、系统目录、密钥操作,必须 `ask_user` 或 `deny`。 +- 执行范围不清晰,必须 `ask_user`。 +- Hermes `reject`,必须 `deny`。 +- Hermes `revise` 但 Analyst 未修订,必须 `deny`。 +- 只有明确低风险且范围清晰,才能 `allow`。 +- Guard 不能执行任务,只能裁决。 + +## 永久禁止动作 + +- `rm -rf` +- `chmod -R` +- `chown -R` +- `sudo` +- 修改 `/usr`、`/bin`、`/etc`、`/var`、`/boot`、`/dev`、`/proc`、`/sys`、`/root` +- 打印、复制、持久化 API key、token、cookie、password、secret、private key + diff --git a/multi_agent/agents/hermes_reviewer.md b/multi_agent/agents/hermes_reviewer.md new file mode 100644 index 0000000..1813f78 --- /dev/null +++ b/multi_agent/agents/hermes_reviewer.md @@ -0,0 +1,38 @@ +# Hermes Reviewer + +Hermes Reviewer 是高级审查者,负责对 Analyst 方案给出第二意见。 + +## 职责 + +- 审查 Analyst 方案。 +- 找漏洞。 +- 找安全风险。 +- 判断是否过度设计。 +- 识别遗漏风险。 +- 给出 `approve` / `revise` / `reject`。 + +## 禁止 + +- 直接修改文件。 +- 直接执行命令。 +- 扩大任务范围。 + +## 必须输出 + +```markdown +## Hermes Review + +- 总体评价: +- 方案优点: +- 潜在问题: +- 遗漏风险: +- 优化建议: +- 最终建议: approve | revise | reject +``` + +## 判定规则 + +- 方案范围清晰、风险可控、验证充分,输出 `approve`。 +- 方案方向可行但范围、验证或安全边界不足,输出 `revise`。 +- 方案存在明显安全问题、会破坏现有系统或缺少关键上下文,输出 `reject`。 + diff --git a/multi_agent/agents/main.md b/multi_agent/agents/main.md new file mode 100644 index 0000000..4a52120 --- /dev/null +++ b/multi_agent/agents/main.md @@ -0,0 +1,34 @@ +# Main Coordinator + +Main 是 OpenClaw Multi-Agent OS 的总调度器,不直接执行复杂任务。 + +## 职责 + +- 理解用户需求。 +- 判断任务类型。 +- 选择工作流。 +- 分派给子 agent。 +- 汇总 Scout、Analyst、Hermes、Guard、Executor、Verifier 的结果。 +- 决定是否继续。 +- 触发 Memory Manager 记录。 + +## 原则 + +- 能调度就不亲自执行。 +- 能让 Scout 看就不让 Analyst 猜。 +- 能让 Guard 审就不直接改。 +- 能让 Verifier 验就不自称成功。 +- 高风险任务必须停在 `ask_user`。 +- 每次任务结束必须写入 daily memory。 + +## 标准流程 + +1. 交给 Router 判断 `task_type`、复杂度和风险。 +2. 根据 workflow 调用 Scout 做只读侦察。 +3. 需要方案时交给 Analyst。 +4. 复杂架构、跨多文件、高风险任务交给 Hermes Reviewer。 +5. 执行前必须经过 Guard。 +6. Guard 输出 `decision: allow` 后才允许 Executor 执行。 +7. Executor 完成后必须交给 Verifier。 +8. Memory Manager 记录任务结果、失败、决策和经验。 + diff --git a/multi_agent/agents/memory_manager.md b/multi_agent/agents/memory_manager.md new file mode 100644 index 0000000..bfd132c --- /dev/null +++ b/multi_agent/agents/memory_manager.md @@ -0,0 +1,29 @@ +# Memory Manager + +Memory Manager 负责沉淀任务结果和可复用经验。 + +## 职责 + +- 写 daily memory。 +- 更新 project status。 +- 写 architecture decisions。 +- 写 failures。 +- 写 lessons。 +- 不记录密钥。 +- 不记录隐私敏感内容。 + +## 目录 + +- `memory/daily/`: 每日任务流水。 +- `memory/project/`: 项目当前状态。 +- `memory/decisions/`: 架构和设计决策。 +- `memory/failures/`: 失败任务和原因。 +- `memory/lessons/`: 可复用经验。 + +## 规则 + +- 每次任务结束都必须写入 daily memory。 +- 验证失败必须写入 failures。 +- `architecture` 任务必须写入 decisions。 +- 记录文件路径、命令摘要和验证结果,但不能记录凭据内容。 + diff --git a/multi_agent/agents/router.md b/multi_agent/agents/router.md new file mode 100644 index 0000000..9d7f8c2 --- /dev/null +++ b/multi_agent/agents/router.md @@ -0,0 +1,34 @@ +# Router + +Router 负责把用户目标分类成明确工作流。 + +## 任务类型 + +- `info`: 查询、列目录、看文件、检查状态。 +- `analysis`: 分析问题但不修改。 +- `debug`: 分析并修复 bug。 +- `modify`: 普通修改。 +- `architecture`: 架构设计、复杂重构、多文件改动。 +- `risky`: 删除、权限、系统配置、密钥、批量覆盖。 + +## 输出格式 + +```yaml +task_type: info | analysis | debug | modify | architecture | risky +complexity: low | medium | high +risk_level: low | medium | high +workflow: multi_agent/workflows/.yaml +need_hermes: yes | no +need_user_confirm: yes | no +reason: +``` + +## 判定规则 + +- 包含删除、权限、系统目录、密钥、批量覆盖,归为 `risky`。 +- 包含架构、重构、跨多个文件、迁移,归为 `architecture`。 +- 明确只要看信息,归为 `info`。 +- 只要分析不要修改,归为 `analysis`。 +- 修 bug 且需要执行修复,归为 `debug`。 +- 低风险文件创建或小范围编辑,归为 `modify`。 + diff --git a/multi_agent/agents/scout.md b/multi_agent/agents/scout.md new file mode 100644 index 0000000..6390c47 --- /dev/null +++ b/multi_agent/agents/scout.md @@ -0,0 +1,41 @@ +# Scout + +Scout 只读侦察,不修改任何文件。 + +## 允许 + +- `ls` +- `find` +- `grep` / `rg` +- `cat` / `sed` / `head` / `tail` +- `git status` +- 查看日志 + +## 禁止 + +- 写文件。 +- 删除文件。 +- `chmod` / `chown` / `sudo`。 +- 执行安装命令。 +- 执行破坏性命令。 +- 调用会改变项目状态的工具。 + +## 必须输出 + +```markdown +## Scout Report + +- 目标: +- 已检查路径: +- 发现的关键文件: +- 当前状态: +- 疑似问题: +- 交给 Analyst 的信息: +``` + +## 原则 + +- 不猜测文件位置,先看再说。 +- 发现凭据、token、cookie、密钥时只报告“存在敏感配置”,不能打印内容。 +- 如果需要写入或执行,交还 Main,由后续 agent 处理。 + diff --git a/multi_agent/agents/verifier.md b/multi_agent/agents/verifier.md new file mode 100644 index 0000000..c628067 --- /dev/null +++ b/multi_agent/agents/verifier.md @@ -0,0 +1,30 @@ +# Verifier + +Verifier 负责验证执行结果,不负责执行修复。 + +## 职责 + +- 检查目标文件。 +- 检查文件内容。 +- 运行测试或检查命令。 +- 检查日志。 +- 输出 `pass` / `fail` / `partial`。 + +## 输出格式 + +```markdown +## Verifier Result + +- status: pass | fail | partial +- checks: +- evidence: +- next_steps: +``` + +## 规则 + +- 没有证据不能输出 `pass`。 +- 检查失败必须说明失败项。 +- 部分通过必须输出 `partial`。 +- 不记录密钥、token、cookie 或凭据内容。 + diff --git a/multi_agent/config.yaml b/multi_agent/config.yaml new file mode 100644 index 0000000..3478628 --- /dev/null +++ b/multi_agent/config.yaml @@ -0,0 +1,44 @@ +project: + name: openclaw-multi-agent-os + root: "." + workspace: "multi_agent/workspace" + openclaw_config: ".openclaw" + +tools: + openclaw: + command: "openclaw" + required: false + hermes: + command: "hermes" + required: false + claude_code: + command: "claude" + required: false + +policy: + allow_file_write: true + allow_delete: false + allow_system_path_modify: false + require_guard_before_execute: true + require_verifier_after_execute: true + require_memory_after_task: true + +workflows: + info: "multi_agent/workflows/info.yaml" + analysis: "multi_agent/workflows/analysis.yaml" + modify: "multi_agent/workflows/modify.yaml" + debug: "multi_agent/workflows/debug.yaml" + architecture: "multi_agent/workflows/architecture.yaml" + risky: "multi_agent/workflows/risky.yaml" + +memory: + daily: "multi_agent/memory/daily" + project: "multi_agent/memory/project" + decisions: "multi_agent/memory/decisions" + failures: "multi_agent/memory/failures" + lessons: "multi_agent/memory/lessons" + +logs: + runs: "multi_agent/logs/runs" + messages: "multi_agent/logs/messages" + diff --git a/multi_agent/memory/templates/daily.md b/multi_agent/memory/templates/daily.md new file mode 100644 index 0000000..b90d1bb --- /dev/null +++ b/multi_agent/memory/templates/daily.md @@ -0,0 +1,12 @@ +# Daily Memory Template + +## HH:MM:SS task_id + +- task_type: +- summary: +- agents_used: +- files_changed: +- commands_run: +- verifier_result: +- next_steps: + diff --git a/multi_agent/memory/templates/decision.md b/multi_agent/memory/templates/decision.md new file mode 100644 index 0000000..3433b13 --- /dev/null +++ b/multi_agent/memory/templates/decision.md @@ -0,0 +1,10 @@ +# Decision Memory Template + +- task_id: +- date: +- context: +- decision: +- alternatives: +- risks: +- verification: + diff --git a/multi_agent/memory/templates/failure.md b/multi_agent/memory/templates/failure.md new file mode 100644 index 0000000..e85c4a7 --- /dev/null +++ b/multi_agent/memory/templates/failure.md @@ -0,0 +1,10 @@ +# Failure Memory Template + +- task_id: +- date: +- task_type: +- failure: +- likely_cause: +- attempted_fix: +- next_steps: + diff --git a/multi_agent/memory/templates/lesson.md b/multi_agent/memory/templates/lesson.md new file mode 100644 index 0000000..304beba --- /dev/null +++ b/multi_agent/memory/templates/lesson.md @@ -0,0 +1,8 @@ +# Lesson Memory Template + +- date: +- source_task: +- lesson: +- applies_to: +- avoid: + diff --git a/multi_agent/scripts/call_claude_code.sh b/multi_agent/scripts/call_claude_code.sh new file mode 100755 index 0000000..dfdff6a --- /dev/null +++ b/multi_agent/scripts/call_claude_code.sh @@ -0,0 +1,179 @@ +#!/usr/bin/env bash +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MA_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +LOG_DIR="$MA_DIR/logs/runs" +MSG_DIR="$MA_DIR/logs/messages" +mkdir -p "$LOG_DIR" "$MSG_DIR" + +PLAN_FILE="" +HERMES_FILE="" +GUARD_FILE="" +OUTPUT_FILE="" +GOAL="" +TASK_ID="claude_$(date +%Y%m%d_%H%M%S)" +DRY_RUN=0 +CLAUDE_TIMEOUT_SECONDS="${CLAUDE_TIMEOUT_SECONDS:-180}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --plan) + PLAN_FILE="${2:-}" + shift 2 + ;; + --hermes) + HERMES_FILE="${2:-}" + shift 2 + ;; + --guard) + GUARD_FILE="${2:-}" + shift 2 + ;; + --output) + OUTPUT_FILE="${2:-}" + shift 2 + ;; + --goal) + GOAL="${2:-}" + shift 2 + ;; + --task-id) + TASK_ID="${2:-}" + shift 2 + ;; + --dry-run) + DRY_RUN=1 + shift + ;; + *) + printf 'unknown argument: %s\n' "$1" >&2 + exit 64 + ;; + esac +done + +if [[ -z "$PLAN_FILE" || -z "$GUARD_FILE" || -z "$OUTPUT_FILE" ]]; then + printf 'usage: call_claude_code.sh --plan PLAN --guard GUARD --output OUT [--hermes REVIEW] [--goal GOAL]\n' >&2 + exit 64 +fi + +RUN_LOG="$LOG_DIR/${TASK_ID}_claude_code.log" +PROMPT_FILE="$MSG_DIR/${TASK_ID}_claude_code_prompt.md" +HELP_FILE="$MSG_DIR/${TASK_ID}_claude_code_help.txt" + +guard_section() { + local start="$1" + local stop="$2" + awk -v start="$start" -v stop="$stop" ' + $0 ~ start {capture=1; next} + stop != "" && $0 ~ stop {capture=0} + capture {print} + ' "$GUARD_FILE" +} + +{ + printf '# Claude Code Execution Request\n\n' + printf 'You are the bounded execution backend for OpenClaw Multi-Agent OS.\n\n' + printf '## User Goal\n\n%s\n\n' "$GOAL" + printf '## Analyst Plan\n\n' + cat "$PLAN_FILE" + printf '\n\n## Hermes Review\n\n' + if [[ -n "$HERMES_FILE" && -f "$HERMES_FILE" ]]; then + cat "$HERMES_FILE" + else + printf 'No Hermes review was provided.\n' + fi + printf '\n\n## Guard Decision And Allowed Scope\n\n' + cat "$GUARD_FILE" + printf '\n\n## Guard Allowed Scope Only\n\n' + guard_section '^allowed_scope:' '^forbidden_actions:' + printf '\n\n## Guard Forbidden Actions Only\n\n' + guard_section '^forbidden_actions:' '' + printf '\n\n## Verifier Validation Method\n\n' + awk '/^verify:/{capture=1; next} capture {print}' "$PLAN_FILE" + printf '\n\n## Hard Boundaries\n\n' + printf -- '- Only execute the Guard allowed scope.\n' + printf -- '- Do not delete files.\n' + printf -- '- Do not use sudo, chmod -R, chown -R, or system path edits.\n' + printf -- '- Do not print, copy, or persist credentials.\n' + printf -- '- Keep changes minimal and leave verification to Verifier.\n' + printf '\n## Required Executor Output\n\n' + printf -- '- 执行目标:\n- 执行步骤:\n- 修改文件:\n- 运行命令:\n- 成功项:\n- 失败项:\n- 需要 Verifier 检查的内容:\n' +} > "$PROMPT_FILE" + +printf 'Claude prompt: %s\n' "$PROMPT_FILE" >> "$RUN_LOG" + +if [[ "$DRY_RUN" -eq 1 ]]; then + cat > "$OUTPUT_FILE" <> "$RUN_LOG" + exit 0 +fi + +if ! command -v claude >/dev/null 2>&1; then + cat > "$OUTPUT_FILE" <> "$RUN_LOG" + exit 20 +fi + +timeout 20 claude --help > "$HELP_FILE" 2>> "$RUN_LOG" || true +if ! grep -Eq -- '-p, --print|--print' "$HELP_FILE"; then + cat > "$OUTPUT_FILE" <> "$RUN_LOG" + exit 20 +fi + +if timeout "$CLAUDE_TIMEOUT_SECONDS" claude --print \ + --no-session-persistence \ + --permission-mode acceptEdits \ + --tools "Read,Write,Edit" \ + < "$PROMPT_FILE" > "$OUTPUT_FILE" 2>> "$RUN_LOG"; then + if [[ -s "$OUTPUT_FILE" ]]; then + printf 'claude execution complete via --print\n' >> "$RUN_LOG" + exit 0 + fi +fi +CLAUDE_STATUS=$? + +cat > "$OUTPUT_FILE" <&2 + exit 64 + ;; + esac +done + +if [[ -z "$INPUT_FILE" || -z "$OUTPUT_FILE" ]]; then + printf 'usage: call_hermes.sh --input PLAN --output REVIEW [--goal GOAL]\n' >&2 + exit 64 +fi + +RUN_LOG="$LOG_DIR/${TASK_ID}_hermes.log" +PROMPT_FILE="$MSG_DIR/${TASK_ID}_hermes_prompt.md" +TMP_OUTPUT="$MSG_DIR/${TASK_ID}_hermes_raw.md" +HELP_FILE="$MSG_DIR/${TASK_ID}_hermes_help.txt" + +{ + printf '# Hermes Review Request\n\n' + printf '## User Goal\n\n%s\n\n' "$GOAL" + printf '## Analyst Plan\n\n' + cat "$INPUT_FILE" + printf '\n\n## Required Output\n\n' + printf -- '- 总体评价:\n- 方案优点:\n- 潜在问题:\n- 遗漏风险:\n- 优化建议:\n- 最终建议: approve | revise | reject\n' +} > "$PROMPT_FILE" + +printf 'Hermes prompt: %s\n' "$PROMPT_FILE" >> "$RUN_LOG" + +write_fallback() { + local reason="$1" + cat > "$OUTPUT_FILE" </dev/null 2>&1; then + printf 'hermes: found at %s\n' "$(command -v hermes)" >> "$RUN_LOG" + timeout 20 hermes chat --help > "$HELP_FILE" 2>> "$RUN_LOG" || true + if ! grep -Eq -- '-q QUERY|--query QUERY' "$HELP_FILE"; then + printf 'hermes chat help did not expose -q/--query\n' >> "$RUN_LOG" + write_fallback "hermes chat does not expose non-interactive query mode" + printf 'Hermes review written: %s\n' "$OUTPUT_FILE" >> "$RUN_LOG" + printf '%s\n' "$OUTPUT_FILE" + exit 0 + fi + PROMPT_TEXT="$(cat "$PROMPT_FILE")" + timeout "$HERMES_TIMEOUT_SECONDS" hermes chat -Q --max-turns 1 -q "$PROMPT_TEXT" > "$TMP_OUTPUT" 2>> "$RUN_LOG" + STATUS=$? + if [[ "$STATUS" -eq 0 && -s "$TMP_OUTPUT" ]]; then + if grep -Eqi '最终建议:[[:space:]]*(approve|revise|reject)|decision:[[:space:]]*(approve|revise|reject)|final recommendation:[[:space:]]*(approve|revise|reject)' "$TMP_OUTPUT"; then + cp "$TMP_OUTPUT" "$OUTPUT_FILE" + if ! grep -Eqi '最终建议|final recommendation' "$OUTPUT_FILE"; then + cat >> "$OUTPUT_FILE" <<'EOF' + +## Hermes Adapter Note + +- 总体评价: Hermes returned partial review output. +- 方案优点: See Hermes output above. +- 潜在问题: Final recommendation was missing. +- 遗漏风险: Review may be incomplete. +- 优化建议: Analyst should revise the plan before execution. +- 最终建议: revise +decision: revise +EOF + fi + else + printf 'hermes output did not match review format, writing fallback review\n' >> "$RUN_LOG" + write_fallback "hermes returned output without explicit approve/revise/reject" + fi + else + printf 'hermes call failed or produced empty output, status=%s\n' "$STATUS" >> "$RUN_LOG" + write_fallback "hermes non-interactive call failed or timed out with status $STATUS" + fi +else + printf 'hermes: missing, writing fallback review\n' >> "$RUN_LOG" + write_fallback "hermes command not found" +fi + +printf 'Hermes review written: %s\n' "$OUTPUT_FILE" >> "$RUN_LOG" +printf '%s\n' "$OUTPUT_FILE" diff --git a/multi_agent/scripts/call_openclaw.sh b/multi_agent/scripts/call_openclaw.sh new file mode 100755 index 0000000..6c87d44 --- /dev/null +++ b/multi_agent/scripts/call_openclaw.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MA_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +LOG_DIR="$MA_DIR/logs/runs" +MSG_DIR="$MA_DIR/logs/messages" +mkdir -p "$LOG_DIR" "$MSG_DIR" + +AGENT="" +GOAL="" +CONTEXT_FILE="" +OUTPUT_FILE="$MSG_DIR/openclaw_adapter_$(date +%Y%m%d_%H%M%S).md" +TASK_ID="openclaw_$(date +%Y%m%d_%H%M%S)" +OPENCLAW_TIMEOUT_SECONDS="${OPENCLAW_TIMEOUT_SECONDS:-90}" + +usage() { + cat <<'EOF' +usage: call_openclaw.sh --agent scout|analyst|guard|executor|verifier|coordinator --goal GOAL [--context-file PATH] --output-file PATH +EOF +} + +redact_stream() { + sed -E 's/((api[_-]?key|token|secret|password|passwd|cookie|credential|private[_-]?key)[[:space:]]*[:=][[:space:]]*)[^[:space:]]+/\1/Ig' +} + +write_fallback() { + local reason="$1" + { + printf '## OpenClaw Adapter Result\n\n' + printf -- '- mode: fallback\n' + printf -- '- agent: %s\n' "${AGENT:-none}" + printf -- '- goal: %s\n' "$GOAL" + printf -- '- reason: %s\n' "$reason" + printf -- '- output_source: local adapter fallback\n' + printf -- '- next_step: inspect %s and OpenClaw gateway/provider status\n' "$RUN_LOG" + } > "$OUTPUT_FILE" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --agent) + AGENT="${2:-}" + shift 2 + ;; + --goal) + GOAL="${2:-}" + shift 2 + ;; + --context-file) + CONTEXT_FILE="${2:-}" + shift 2 + ;; + --output-file|--output) + OUTPUT_FILE="${2:-}" + shift 2 + ;; + --task-id) + TASK_ID="${2:-}" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + printf 'unknown argument: %s\n' "$1" >&2 + usage >&2 + exit 64 + ;; + esac +done + +if [[ -z "$AGENT" || -z "$GOAL" || -z "$OUTPUT_FILE" ]]; then + usage >&2 + exit 64 +fi + +case "$AGENT" in + scout|analyst|guard|executor|verifier|coordinator) + ;; + *) + printf 'unsupported OpenClaw agent: %s\n' "$AGENT" >&2 + exit 64 + ;; +esac + +RUN_LOG="$LOG_DIR/${TASK_ID}_${AGENT}_openclaw.log" +HELP_FILE="$MSG_DIR/${TASK_ID}_${AGENT}_openclaw_help.txt" +AGENTS_FILE="$MSG_DIR/${TASK_ID}_${AGENT}_openclaw_agents.json" +PROMPT_FILE="$MSG_DIR/${TASK_ID}_${AGENT}_openclaw_prompt.md" +RAW_OUTPUT="$MSG_DIR/${TASK_ID}_${AGENT}_openclaw_raw.json" + +if ! command -v openclaw >/dev/null 2>&1; then + printf 'openclaw: missing\n' >> "$RUN_LOG" + write_fallback "openclaw command not found" + printf '%s\n' "$OUTPUT_FILE" + exit 0 +fi + +timeout 20 openclaw agent --help > "$HELP_FILE" 2>> "$RUN_LOG" || true +if ! grep -q -- '--agent ' "$HELP_FILE"; then + printf 'openclaw agent help did not expose --agent \n' >> "$RUN_LOG" + write_fallback "openclaw agent command does not expose --agent " + printf '%s\n' "$OUTPUT_FILE" + exit 0 +fi + +timeout 20 openclaw agents list --json > "$AGENTS_FILE" 2>> "$RUN_LOG" +LIST_STATUS=$? +if [[ "$LIST_STATUS" -ne 0 || ! -s "$AGENTS_FILE" ]]; then + printf 'openclaw agents list failed, status=%s\n' "$LIST_STATUS" >> "$RUN_LOG" + write_fallback "unable to list OpenClaw agents" + printf '%s\n' "$OUTPUT_FILE" + exit 0 +fi + +if ! python3 - "$AGENTS_FILE" "$AGENT" <<'PY' >> "$RUN_LOG" 2>&1 +import json +import sys + +path, expected = sys.argv[1], sys.argv[2] +agents = json.load(open(path, encoding="utf-8")) +ids = {item.get("id") for item in agents} +if expected not in ids: + print(f"agent_not_configured: {expected}") + sys.exit(1) +print(f"agent_configured: {expected}") +PY +then + write_fallback "OpenClaw agent '$AGENT' is not configured in agents list" + printf '%s\n' "$OUTPUT_FILE" + exit 0 +fi + +{ + printf '# OpenClaw Agent Request\n\n' + printf 'Agent: %s\n\n' "$AGENT" + printf 'Goal: %s\n\n' "$GOAL" + printf 'Safety:\n' + printf -- '- Stay within the provided context and role.\n' + printf -- '- Do not delete files.\n' + printf -- '- Do not use sudo, chmod -R, chown -R, or system path edits.\n' + printf -- '- Do not print or persist credentials.\n\n' + printf 'Context:\n' + if [[ -n "$CONTEXT_FILE" ]]; then + if [[ -f "$CONTEXT_FILE" ]]; then + head -c 20000 "$CONTEXT_FILE" | redact_stream + printf '\n' + else + printf 'context file missing: %s\n' "$CONTEXT_FILE" + fi + else + printf 'No context file provided.\n' + fi +} > "$PROMPT_FILE" + +printf 'openclaw: %s\n' "$(command -v openclaw)" >> "$RUN_LOG" +printf 'prompt_file: %s\n' "$PROMPT_FILE" >> "$RUN_LOG" + +PROMPT_TEXT="$(cat "$PROMPT_FILE")" +timeout "$OPENCLAW_TIMEOUT_SECONDS" openclaw agent \ + --agent "$AGENT" \ + --message "$PROMPT_TEXT" \ + --json \ + --timeout "$OPENCLAW_TIMEOUT_SECONDS" > "$RAW_OUTPUT" 2>> "$RUN_LOG" +CALL_STATUS=$? + +if [[ "$CALL_STATUS" -eq 0 && -s "$RAW_OUTPUT" ]]; then + { + printf '## OpenClaw Adapter Result\n\n' + printf -- '- mode: real_openclaw_agent\n' + printf -- '- agent: %s\n' "$AGENT" + printf -- '- goal: %s\n' "$GOAL" + printf -- '- raw_output: %s\n\n' "$RAW_OUTPUT" + printf '```json\n' + head -c 20000 "$RAW_OUTPUT" | redact_stream + printf '\n```\n' + } > "$OUTPUT_FILE" + printf 'openclaw agent call succeeded\n' >> "$RUN_LOG" + printf '%s\n' "$OUTPUT_FILE" + exit 0 +fi + +printf 'openclaw agent call failed, status=%s\n' "$CALL_STATUS" >> "$RUN_LOG" +write_fallback "openclaw agent call failed; see run log for gateway/provider details" +printf '%s\n' "$OUTPUT_FILE" +exit 0 diff --git a/multi_agent/scripts/detect_tools.sh b/multi_agent/scripts/detect_tools.sh new file mode 100755 index 0000000..87e3adc --- /dev/null +++ b/multi_agent/scripts/detect_tools.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MA_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +LOG_DIR="$MA_DIR/logs/runs" +mkdir -p "$LOG_DIR" + +REPORT="$LOG_DIR/tools_report_$(date +%Y%m%d_%H%M%S).log" + +tool_version() { + local cmd="$1" + if command -v timeout >/dev/null 2>&1; then + timeout 10 "$cmd" --version 2>&1 | head -n 1 + else + "$cmd" --version 2>&1 | head -n 1 + fi +} + +check_tool() { + local name="$1" + local cmd="$2" + if command -v "$cmd" >/dev/null 2>&1; then + local path + local version + path="$(command -v "$cmd")" + version="$(tool_version "$cmd" || true)" + printf '%s: found\n' "$name" + printf ' command: %s\n' "$cmd" + printf ' path: %s\n' "$path" + printf ' version: %s\n' "${version:-unknown}" + else + printf '%s: missing\n' "$name" + printf ' command: %s\n' "$cmd" + printf ' version: unavailable\n' + fi +} + +{ + printf 'tools_report\n' + printf 'generated_at: %s\n' "$(date -Iseconds)" + printf 'multi_agent_root: %s\n\n' "$MA_DIR" + check_tool "openclaw" "openclaw" + printf '\n' + check_tool "hermes" "hermes" + printf '\n' + check_tool "claude" "claude" + printf '\n' + check_tool "python3" "python3" + printf '\n' + check_tool "git" "git" + printf '\n' + check_tool "rg" "rg" + printf '\nreport_file: %s\n' "$REPORT" +} | tee "$REPORT" + diff --git a/multi_agent/scripts/guard_check.py b/multi_agent/scripts/guard_check.py new file mode 100755 index 0000000..623ba12 --- /dev/null +++ b/multi_agent/scripts/guard_check.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +"""Guard check for OpenClaw Multi-Agent OS execution plans.""" + +from __future__ import annotations + +import argparse +import pathlib +import re +import sys + + +DENY_PATTERNS = [ + (re.compile(r"\brm\s+-[^\n]*r[^\n]*f|\brm\s+-[^\n]*f[^\n]*r", re.I), "recursive force delete is forbidden"), + (re.compile(r"\bsudo\b", re.I), "sudo is forbidden"), + (re.compile(r"\bchmod\s+-R\b", re.I), "recursive chmod is forbidden"), + (re.compile(r"\bchown\s+-R\b", re.I), "recursive chown is forbidden"), + (re.compile(r"\bdd\s+if=", re.I), "raw disk writes are forbidden"), + (re.compile(r"\bmkfs(\.|\\s|$)", re.I), "filesystem formatting is forbidden"), + (re.compile(r"\bshred\b", re.I), "secure deletion is forbidden"), +] + +ASK_PATTERNS = [ + (re.compile(r"\bdelete\b|\bremove\b|\boverwrite\b|删除|覆盖|清空", re.I), "destructive intent needs user confirmation"), + (re.compile(r"\b(systemctl|service)\b", re.I), "service management needs user confirmation"), + (re.compile(r"\b(apt|apt-get|yum|dnf|pacman|brew|npm|pip3?|cargo)\s+install\b", re.I), "install commands need user confirmation"), +] + +SYSTEM_PATH_RE = re.compile(r"(^|[\s'\"=:])/(usr|bin|etc|var|boot|dev|proc|sys|root)(/|[\s'\"=:]|$)") +SECRET_RE = re.compile( + r"(api[_-]?key|token|secret|password|passwd|cookie|credential|private[_-]?key)", + re.I, +) + + +def read_text(path: str | None) -> str: + if not path: + return "" + p = pathlib.Path(path) + if not p.exists(): + return "" + return p.read_text(encoding="utf-8", errors="replace") + + +def hermes_decision(text: str) -> str: + if not text: + return "" + lines = [line.strip().lower() for line in text.splitlines()] + final_lines = [line for line in lines if "final" in line or "最终建议" in line] + haystack = "\n".join(final_lines or lines[-10:]) + if "reject" in haystack: + return "reject" + if "revise" in haystack: + return "revise" + if "approve" in haystack: + return "approve" + return "" + + +def has_clear_scope(plan: str) -> bool: + required_markers = ("allowed_files:", "需要修改的文件", "执行步骤", "commands:", "验证方法") + return any(marker in plan for marker in required_markers) + + +def emit(decision: str, risk_level: str, reason: str, allowed_scope: str, forbidden_actions: str) -> int: + print("[Guard Decision]") + print(f"decision: {decision}") + print(f"risk_level: {risk_level}") + print(f"reason: {reason}") + print("allowed_scope:") + print(allowed_scope.strip() or "- none") + print("forbidden_actions:") + print(forbidden_actions.strip() or "- destructive deletes") + print("- recursive permission or ownership changes") + print("- system path modification") + print("- secret exposure or credential handling") + return {"allow": 0, "ask_user": 2, "deny": 3}[decision] + + +def main() -> int: + parser = argparse.ArgumentParser(description="Check an execution plan before Executor runs.") + parser.add_argument("--plan-file", required=True) + parser.add_argument("--task-type", default="modify") + parser.add_argument("--hermes-file") + parser.add_argument("--analyst-revised", action="store_true") + args = parser.parse_args() + + plan = read_text(args.plan_file) + hermes = read_text(args.hermes_file) + task_type = args.task_type.strip().lower() + + if not plan.strip(): + return emit("ask_user", "medium", "execution plan is empty", "- none", "- all execution until scope is clear") + + h_decision = hermes_decision(hermes) + if h_decision == "reject": + return emit("deny", "high", "Hermes rejected the plan", "- none", "- all execution") + if h_decision == "revise" and not args.analyst_revised: + return emit("deny", "medium", "Hermes requested revision but Analyst revision is missing", "- none", "- all execution") + + if task_type == "risky": + return emit("ask_user", "high", "task_type is risky and requires explicit user confirmation", "- none", "- all execution") + + for pattern, reason in DENY_PATTERNS: + if pattern.search(plan): + return emit("deny", "high", reason, "- none", "- forbidden command in plan") + + if SYSTEM_PATH_RE.search(plan): + return emit("deny", "high", "plan references protected system paths", "- none", "- system path modification") + + if SECRET_RE.search(plan): + return emit("ask_user", "high", "plan references credential or secret-related keywords", "- none", "- secret exposure or credential handling") + + for pattern, reason in ASK_PATTERNS: + if pattern.search(plan): + return emit("ask_user", "medium", reason, "- pending user confirmation", "- destructive or environment-changing actions") + + if task_type in {"modify", "debug", "architecture"} and not has_clear_scope(plan): + return emit("ask_user", "medium", "execution scope is not clear enough", "- none", "- all execution until scope is clear") + + risk = "medium" if task_type in {"debug", "architecture"} else "low" + allowed = [] + for line in plan.splitlines(): + stripped = line.strip() + if stripped.startswith("- ") and ( + "multi_agent/" in stripped + or "workspace/" in stripped + or stripped.startswith("- no file changes") + ): + allowed.append(stripped) + if not allowed: + allowed.append("- scope defined in Analyst Plan") + + return emit( + "allow", + risk, + "plan has clear scope and no forbidden operations were detected", + "\n".join(allowed), + "- destructive deletes\n- recursive permission changes\n- system path modification\n- secret exposure", + ) + + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/multi_agent/scripts/run_workflow.sh b/multi_agent/scripts/run_workflow.sh new file mode 100755 index 0000000..bad027a --- /dev/null +++ b/multi_agent/scripts/run_workflow.sh @@ -0,0 +1,668 @@ +#!/usr/bin/env bash +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MA_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +ROOT_DIR="$(cd "$MA_DIR/.." && pwd)" +cd "$ROOT_DIR" + +TASK_TYPE="" +GOAL="" +BACKEND="shell" +DRY_RUN=0 + +usage() { + cat <<'EOF' +usage: run_workflow.sh --type TYPE --goal GOAL [--backend shell|python|claude_code] [--dry-run] + +TYPE: info | analysis | modify | debug | architecture | risky +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --type) + TASK_TYPE="${2:-}" + shift 2 + ;; + --goal) + GOAL="${2:-}" + shift 2 + ;; + --backend) + BACKEND="${2:-}" + shift 2 + ;; + --dry-run) + DRY_RUN=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + printf 'unknown argument: %s\n' "$1" >&2 + usage >&2 + exit 64 + ;; + esac +done + +if [[ -z "$TASK_TYPE" || -z "$GOAL" ]]; then + usage >&2 + exit 64 +fi + +case "$TASK_TYPE" in + info|analysis|modify|debug|architecture|risky) + ;; + *) + printf 'unsupported task type: %s\n' "$TASK_TYPE" >&2 + exit 64 + ;; +esac + +case "$BACKEND" in + shell|python|claude_code) + ;; + *) + printf 'unsupported backend: %s\n' "$BACKEND" >&2 + exit 64 + ;; +esac + +mkdir -p "$MA_DIR/logs/runs" "$MA_DIR/logs/messages" "$MA_DIR/workspace" \ + "$MA_DIR/memory/daily" "$MA_DIR/memory/project" "$MA_DIR/memory/decisions" \ + "$MA_DIR/memory/failures" "$MA_DIR/memory/lessons" + +TASK_ID="${TASK_TYPE}_$(date +%Y%m%d_%H%M%S)_$$" +RUN_LOG="$MA_DIR/logs/runs/${TASK_ID}.log" +ROUTER_FILE="$MA_DIR/logs/messages/${TASK_ID}_router.md" +SCOUT_FILE="$MA_DIR/logs/messages/${TASK_ID}_scout.md" +ANALYST_FILE="$MA_DIR/logs/messages/${TASK_ID}_analyst.md" +HERMES_FILE="$MA_DIR/logs/messages/${TASK_ID}_hermes.md" +GUARD_FILE="$MA_DIR/logs/messages/${TASK_ID}_guard.md" +EXECUTOR_FILE="$MA_DIR/logs/messages/${TASK_ID}_executor.md" +VERIFIER_FILE="$MA_DIR/logs/messages/${TASK_ID}_verifier.md" + +AGENTS_USED="main,router" +FILES_CHANGED="" +COMMANDS_RUN="" +VERIFIER_RESULT="not_run" +SUMMARY="workflow started" +NEXT_STEPS="none" + +log() { + printf '%s\n' "$*" | tee -a "$RUN_LOG" +} + +goal_is_smoke() { + printf '%s' "$GOAL" | grep -Eqi 'workspace/test[.]txt' && + printf '%s' "$GOAL" | grep -Eqi 'hello multi-agent' +} + +goal_is_claude_backend_test() { + printf '%s' "$GOAL" | grep -Eqi 'workspace/claude_backend_test[.]txt' && + printf '%s' "$GOAL" | grep -Eqi 'claude backend test' +} + +SAFE_CREATE_MATCH=0 +SAFE_CREATE_VALID=0 +SAFE_CREATE_FILE="" +SAFE_CREATE_CONTENT="" +SAFE_CREATE_REL="" +SAFE_CREATE_TARGET="" +SAFE_CREATE_ERROR="" + +parse_workspace_create_goal() { + local goal="$1" + SAFE_CREATE_MATCH=0 + SAFE_CREATE_VALID=0 + SAFE_CREATE_FILE="" + SAFE_CREATE_CONTENT="" + SAFE_CREATE_REL="" + SAFE_CREATE_TARGET="" + SAFE_CREATE_ERROR="" + + if [[ "$goal" =~ ^create[[:space:]]+workspace/([^[:space:]]+)[[:space:]]+with[[:space:]]+(.+)$ ]]; then + SAFE_CREATE_MATCH=1 + SAFE_CREATE_FILE="${BASH_REMATCH[1]}" + SAFE_CREATE_CONTENT="${BASH_REMATCH[2]}" + + if [[ -z "$SAFE_CREATE_FILE" ]]; then + SAFE_CREATE_ERROR="filename is empty" + elif [[ "$SAFE_CREATE_FILE" == /* ]]; then + SAFE_CREATE_ERROR="filename must not start with /" + elif [[ "$SAFE_CREATE_FILE" == *..* ]]; then + SAFE_CREATE_ERROR="filename must not contain .." + elif [[ "$SAFE_CREATE_FILE" == *~* ]]; then + SAFE_CREATE_ERROR="filename must not contain ~" + elif [[ "$SAFE_CREATE_FILE" == */* ]]; then + SAFE_CREATE_ERROR="filename must not contain path separators" + else + SAFE_CREATE_VALID=1 + SAFE_CREATE_REL="multi_agent/workspace/$SAFE_CREATE_FILE" + SAFE_CREATE_TARGET="$MA_DIR/workspace/$SAFE_CREATE_FILE" + fi + fi +} + +write_memory() { + local memory_agents="$AGENTS_USED,memory_manager" + python3 "$SCRIPT_DIR/write_memory.py" \ + --root "$MA_DIR" \ + --task-id "$TASK_ID" \ + --task-type "$TASK_TYPE" \ + --summary "$SUMMARY" \ + --agents-used "$memory_agents" \ + --files-changed "$FILES_CHANGED" \ + --commands-run "$COMMANDS_RUN" \ + --verifier-result "$VERIFIER_RESULT" \ + --next-steps "$NEXT_STEPS" >> "$RUN_LOG" 2>&1 +} + +parse_guard_decision() { + awk -F': *' '/^decision:/ {print $2; exit}' "$GUARD_FILE" +} + +parse_hermes_decision() { + if [[ ! -f "$HERMES_FILE" ]]; then + printf '' + return + fi + grep -Ei '最终建议|final recommendation|approve|revise|reject' "$HERMES_FILE" | + tail -n 1 | + tr '[:upper:]' '[:lower:]' +} + +log "task_id: $TASK_ID" +log "task_type: $TASK_TYPE" +log "backend: $BACKEND" +log "dry_run: $DRY_RUN" +log "goal: $GOAL" +log "run_log: $RUN_LOG" + +parse_workspace_create_goal "$GOAL" + +COMPLEXITY="low" +RISK_LEVEL="low" +NEED_HERMES="no" +NEED_USER_CONFIRM="no" +WORKFLOW="multi_agent/workflows/${TASK_TYPE}.yaml" + +case "$TASK_TYPE" in + architecture) + COMPLEXITY="high" + RISK_LEVEL="medium" + NEED_HERMES="yes" + ;; + debug) + COMPLEXITY="medium" + RISK_LEVEL="medium" + ;; + risky) + COMPLEXITY="high" + RISK_LEVEL="high" + NEED_HERMES="yes" + NEED_USER_CONFIRM="yes" + ;; +esac + +if printf '%s' "$GOAL" | grep -Eqi 'system|permission|credential|token|cookie|delete|remove|sudo|/etc|/usr|/bin|/var'; then + RISK_LEVEL="high" + NEED_USER_CONFIRM="yes" +fi + +cat > "$ROUTER_FILE" < "$SCOUT_FILE" </dev/null || printf 'no') +- 疑似问题: + - none from read-only scout +- 交给 Analyst 的信息: + - Use .multi-agent as the bounded orchestration layer. + - Do not modify .openclaw unless a later plan explicitly scopes it. +EOF +AGENTS_USED="$AGENTS_USED,scout" +log "stage scout: $SCOUT_FILE" + +if [[ "$TASK_TYPE" == "info" ]]; then + SUMMARY="info workflow completed with read-only Scout report" + VERIFIER_RESULT="not_required" + NEXT_STEPS="review $SCOUT_FILE" + write_memory + log "workflow_result: pass" + log "memory: written" + printf 'workflow_result: pass\n' + printf 'scout_report: %s\n' "$SCOUT_FILE" + exit 0 +fi + +PLAN_FILES="- no file changes" +PLAN_COMMANDS="- no execution" +PLAN_VERIFY="- inspect Analyst output" +PLAN_RISK="$RISK_LEVEL" +HERMES_REQUIRED="no" + +if [[ "$SAFE_CREATE_VALID" -eq 1 ]]; then + PLAN_FILES="- $SAFE_CREATE_REL" + PLAN_COMMANDS="- create $SAFE_CREATE_REL with exact requested content" + PLAN_VERIFY="- file exists: $SAFE_CREATE_REL +- file contains: $SAFE_CREATE_CONTENT" +elif goal_is_smoke; then + PLAN_FILES="- multi_agent/workspace/test.txt" + PLAN_COMMANDS="- create multi_agent/workspace/test.txt with exact content: hello multi-agent" + PLAN_VERIFY="- file exists: multi_agent/workspace/test.txt +- file contains: hello multi-agent" +elif goal_is_claude_backend_test; then + PLAN_FILES="- multi_agent/workspace/claude_backend_test.txt" + PLAN_COMMANDS="- create multi_agent/workspace/claude_backend_test.txt with exact content: claude backend test" + PLAN_VERIFY="- file exists: multi_agent/workspace/claude_backend_test.txt +- file contains: claude backend test" +elif [[ "$TASK_TYPE" == "architecture" ]]; then + PLAN_FILES="- multi_agent/agents/*.md +- multi_agent/workflows/*.yaml +- multi_agent/scripts/*.sh +- multi_agent/scripts/*.py +- multi_agent/README.md" + PLAN_COMMANDS="- execute only after Hermes approve or Analyst revision +- keep changes within .multi-agent" + PLAN_VERIFY="- run smoke test +- inspect logs and memory output" + HERMES_REQUIRED="yes" +elif [[ "$TASK_TYPE" == "debug" || "$TASK_TYPE" == "modify" ]]; then + PLAN_FILES="- multi_agent/workspace/*" + PLAN_COMMANDS="- no generic shell execution; only supported safe actions may run" + PLAN_VERIFY="- run verifier against explicit changed files" +fi + +cat > "$ANALYST_FILE" < "$GUARD_FILE" 2>> "$RUN_LOG" + GUARD_STATUS=$? + AGENTS_USED="$AGENTS_USED,guard" + log "stage guard: $GUARD_FILE status=$GUARD_STATUS" + SUMMARY="analysis workflow completed; no execution performed" + VERIFIER_RESULT="not_required" + NEXT_STEPS="review $ANALYST_FILE" + write_memory + log "workflow_result: pass" + printf 'workflow_result: pass\n' + printf 'analyst_plan: %s\n' "$ANALYST_FILE" + printf 'guard_decision: %s\n' "$GUARD_FILE" + exit 0 +fi + +ANALYST_REVISED=0 +if [[ "$TASK_TYPE" == "architecture" || "$TASK_TYPE" == "risky" ]]; then + "$SCRIPT_DIR/call_hermes.sh" \ + --input "$ANALYST_FILE" \ + --output "$HERMES_FILE" \ + --goal "$GOAL" \ + --task-id "$TASK_ID" >> "$RUN_LOG" 2>&1 + HERMES_STATUS=$? + AGENTS_USED="$AGENTS_USED,hermes_reviewer" + log "stage hermes: $HERMES_FILE status=$HERMES_STATUS" + + HERMES_DECISION="$(parse_hermes_decision)" + if printf '%s' "$HERMES_DECISION" | grep -qi 'reject'; then + SUMMARY="workflow stopped because Hermes rejected the plan" + VERIFIER_RESULT="fail" + NEXT_STEPS="revise Analyst plan before execution" + write_memory + log "workflow_result: stopped_by_hermes_reject" + printf 'workflow_result: stopped_by_hermes_reject\n' + exit 3 + fi + if printf '%s' "$HERMES_DECISION" | grep -qi 'revise'; then + REVISED_FILE="$MA_DIR/logs/messages/${TASK_ID}_analyst_revised.md" + cp "$ANALYST_FILE" "$REVISED_FILE" + cat >> "$REVISED_FILE" <<'EOF' + +## Analyst Revision + +- revision_status: incorporated +- changes: + - tightened execution scope to .multi-agent only + - kept verification explicit + - kept deletion and system-path changes out of scope +- analyst_revision: true +EOF + ANALYST_FILE="$REVISED_FILE" + ANALYST_REVISED=1 + AGENTS_USED="$AGENTS_USED,analyst_revision" + log "stage analyst_revision: $ANALYST_FILE" + fi +fi + +if [[ "$ANALYST_REVISED" -eq 1 ]]; then + python3 "$SCRIPT_DIR/guard_check.py" \ + --task-type "$TASK_TYPE" \ + --plan-file "$ANALYST_FILE" \ + --hermes-file "$HERMES_FILE" \ + --analyst-revised > "$GUARD_FILE" 2>> "$RUN_LOG" +else + python3 "$SCRIPT_DIR/guard_check.py" \ + --task-type "$TASK_TYPE" \ + --plan-file "$ANALYST_FILE" \ + --hermes-file "$HERMES_FILE" > "$GUARD_FILE" 2>> "$RUN_LOG" +fi +GUARD_STATUS=$? +AGENTS_USED="$AGENTS_USED,guard" +log "stage guard: $GUARD_FILE status=$GUARD_STATUS" + +GUARD_DECISION="$(parse_guard_decision)" +if [[ "$GUARD_DECISION" != "allow" ]]; then + SUMMARY="workflow stopped by Guard with decision: ${GUARD_DECISION:-unknown}" + VERIFIER_RESULT="not_run" + NEXT_STEPS="review Guard decision and confirm scope before execution" + write_memory + log "workflow_result: stopped_by_guard" + printf 'workflow_result: stopped_by_guard\n' + printf 'guard_decision: %s\n' "$GUARD_FILE" + exit 4 +fi + +if [[ "$DRY_RUN" -eq 1 ]]; then + SUMMARY="dry-run completed after Guard allow; Executor skipped" + VERIFIER_RESULT="not_run" + NEXT_STEPS="rerun without --dry-run to execute" + write_memory + log "workflow_result: dry_run_pass" + printf 'workflow_result: dry_run_pass\n' + exit 0 +fi + +EXEC_STATUS=0 +case "$BACKEND" in + shell) + if [[ "$SAFE_CREATE_VALID" -eq 1 ]]; then + mkdir -p "$MA_DIR/workspace" + CONTENT_SUMMARY="${SAFE_CREATE_CONTENT//$'\n'/ }" + if [[ ${#CONTENT_SUMMARY} -gt 80 ]]; then + CONTENT_SUMMARY="${CONTENT_SUMMARY:0:80}..." + fi + if [[ -L "$SAFE_CREATE_TARGET" ]]; then + EXEC_STATUS=10 + WRITE_RESULT="refused to follow symlink at $SAFE_CREATE_REL" + WRITE_ACTION="$WRITE_RESULT" + FAILURE_RESULT="$WRITE_RESULT" + FILES_CHANGED="" + COMMANDS_RUN="none" + else + if printf '%s\n' "$SAFE_CREATE_CONTENT" > "$SAFE_CREATE_TARGET"; then + EXEC_STATUS=0 + WRITE_RESULT="file write completed" + WRITE_ACTION="wrote requested content into $SAFE_CREATE_REL" + FAILURE_RESULT="none" + FILES_CHANGED="$SAFE_CREATE_REL" + COMMANDS_RUN="safe shell create $SAFE_CREATE_REL" + else + EXEC_STATUS=$? + WRITE_RESULT="file write failed with status $EXEC_STATUS" + WRITE_ACTION="$WRITE_RESULT" + FAILURE_RESULT="$WRITE_RESULT" + FILES_CHANGED="" + COMMANDS_RUN="safe shell create $SAFE_CREATE_REL" + fi + fi + cat > "$EXECUTOR_FILE" < with + - $WRITE_ACTION + - content summary: $CONTENT_SUMMARY + - content bytes: ${#SAFE_CREATE_CONTENT} +- 修改文件: + - ${FILES_CHANGED:-none} +- 运行命令: + - $COMMANDS_RUN +- 成功项: + - $WRITE_RESULT +- 失败项: + - $FAILURE_RESULT +- 需要 Verifier 检查的内容: + - file exists: $SAFE_CREATE_REL + - file contains requested content +EOF + elif [[ "$SAFE_CREATE_MATCH" -eq 1 ]]; then + EXEC_STATUS=10 + cat > "$EXECUTOR_FILE" < "$MA_DIR/workspace/test.txt" + FILES_CHANGED="multi_agent/workspace/test.txt" + COMMANDS_RUN="create multi_agent/workspace/test.txt" + cat > "$EXECUTOR_FILE" < "$EXECUTOR_FILE" < "$EXECUTOR_FILE" < "$EXECUTOR_FILE" <> "$RUN_LOG" 2>&1 + EXEC_STATUS=$? + COMMANDS_RUN="claude_code adapter" + ;; +esac + +AGENTS_USED="$AGENTS_USED,executor" +log "stage executor: $EXECUTOR_FILE status=$EXEC_STATUS" + +if [[ "$SAFE_CREATE_VALID" -eq 1 ]]; then + python3 "$SCRIPT_DIR/verify.py" \ + --goal "$GOAL" > "$VERIFIER_FILE" 2>> "$RUN_LOG" + VERIFY_STATUS=$? +elif goal_is_smoke; then + python3 "$SCRIPT_DIR/verify.py" \ + --file-exists "multi_agent/workspace/test.txt" \ + --file-contains "multi_agent/workspace/test.txt" "hello multi-agent" > "$VERIFIER_FILE" 2>> "$RUN_LOG" + VERIFY_STATUS=$? +elif goal_is_claude_backend_test; then + python3 "$SCRIPT_DIR/verify.py" \ + --file-exists "multi_agent/workspace/claude_backend_test.txt" \ + --file-contains "multi_agent/workspace/claude_backend_test.txt" "claude backend test" > "$VERIFIER_FILE" 2>> "$RUN_LOG" + VERIFY_STATUS=$? +else + python3 "$SCRIPT_DIR/verify.py" \ + --log-file "$EXECUTOR_FILE" > "$VERIFIER_FILE" 2>> "$RUN_LOG" + VERIFY_STATUS=$? +fi + +AGENTS_USED="$AGENTS_USED,verifier" +log "stage verifier: $VERIFIER_FILE status=$VERIFY_STATUS" + +if grep -Eq 'status: pass' "$VERIFIER_FILE"; then + VERIFIER_RESULT="pass" + SUMMARY="workflow completed and verifier passed" + NEXT_STEPS="none" + RESULT_EXIT=0 +elif grep -Eq 'status: partial' "$VERIFIER_FILE"; then + VERIFIER_RESULT="partial" + SUMMARY="workflow completed with partial verification" + NEXT_STEPS="inspect verifier output and add stronger checks" + RESULT_EXIT=2 +else + VERIFIER_RESULT="fail" + SUMMARY="workflow failed verification" + NEXT_STEPS="inspect executor and verifier logs" + RESULT_EXIT=1 +fi + +if [[ "$EXEC_STATUS" -ne 0 && "$VERIFIER_RESULT" == "pass" ]]; then + VERIFIER_RESULT="fail" + SUMMARY="executor failed even though verifier checks passed" + NEXT_STEPS="inspect executor status and rerun" + RESULT_EXIT=1 +fi + +write_memory +AGENTS_USED="$AGENTS_USED,memory_manager" +log "stage memory: written" +log "workflow_result: $VERIFIER_RESULT" + +printf 'workflow_result: %s\n' "$VERIFIER_RESULT" +printf 'task_id: %s\n' "$TASK_ID" +printf 'run_log: %s\n' "$RUN_LOG" +printf 'verifier: %s\n' "$VERIFIER_FILE" +exit "$RESULT_EXIT" diff --git a/multi_agent/scripts/smoke_test.sh b/multi_agent/scripts/smoke_test.sh new file mode 100755 index 0000000..21214f1 --- /dev/null +++ b/multi_agent/scripts/smoke_test.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +set -u + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MA_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +ROOT_DIR="$(cd "$MA_DIR/.." && pwd)" +cd "$ROOT_DIR" + +RUN_OUTPUT="$MA_DIR/logs/runs/smoke_test_$(date +%Y%m%d_%H%M%S).log" +mkdir -p "$MA_DIR/logs/runs" "$MA_DIR/workspace" + +bash "$SCRIPT_DIR/run_workflow.sh" \ + --type modify \ + --goal "create workspace/test.txt with hello multi-agent" \ + --backend shell > "$RUN_OUTPUT" 2>&1 +STATUS=$? + +FAILURES=0 + +if [[ "$STATUS" -ne 0 ]]; then + printf 'smoke_test: run_workflow failed with status %s\n' "$STATUS" + FAILURES=$((FAILURES + 1)) +fi + +if [[ ! -f "$MA_DIR/workspace/test.txt" ]]; then + printf 'smoke_test: missing multi_agent/workspace/test.txt\n' + FAILURES=$((FAILURES + 1)) +fi + +if [[ -f "$MA_DIR/workspace/test.txt" ]]; then + if ! grep -q 'hello multi-agent' "$MA_DIR/workspace/test.txt"; then + printf 'smoke_test: file content check failed\n' + FAILURES=$((FAILURES + 1)) + fi +fi + +if ! find "$MA_DIR/logs/runs" -type f -name '*.log' | grep -q .; then + printf 'smoke_test: no run logs found\n' + FAILURES=$((FAILURES + 1)) +fi + +TODAY="$(date +%F)" +if [[ ! -f "$MA_DIR/memory/daily/${TODAY}.md" ]]; then + printf 'smoke_test: missing daily memory for %s\n' "$TODAY" + FAILURES=$((FAILURES + 1)) +fi + +if ! grep -q 'workflow_result: pass' "$RUN_OUTPUT"; then + printf 'smoke_test: workflow output did not report pass\n' + FAILURES=$((FAILURES + 1)) +fi + +if [[ "$FAILURES" -eq 0 ]]; then + printf 'smoke_test: pass\n' + printf 'run_output: %s\n' "$RUN_OUTPUT" + exit 0 +fi + +python3 "$SCRIPT_DIR/write_memory.py" \ + --root "$MA_DIR" \ + --task-id "smoke_test_$(date +%Y%m%d_%H%M%S)" \ + --task-type "modify" \ + --summary "smoke test failed" \ + --agents-used "smoke_test,memory_manager" \ + --files-changed "multi_agent/workspace/test.txt" \ + --commands-run "bash multi_agent/scripts/run_workflow.sh --type modify --goal create workspace/test.txt with hello multi-agent --backend shell" \ + --verifier-result "fail" \ + --next-steps "inspect $RUN_OUTPUT" + +printf 'smoke_test: fail\n' +printf 'run_output: %s\n' "$RUN_OUTPUT" +exit 1 + diff --git a/multi_agent/scripts/verify.py b/multi_agent/scripts/verify.py new file mode 100755 index 0000000..3e8e12f --- /dev/null +++ b/multi_agent/scripts/verify.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +"""Verifier for OpenClaw Multi-Agent OS.""" + +from __future__ import annotations + +import argparse +import pathlib +import re +import subprocess +import sys + + +SECRET_VALUE_RE = re.compile( + r"(?i)(api[_-]?key|token|secret|password|passwd|cookie|credential|private[_-]?key)\s*[:=]\s*['\"]?[^'\"\s]+" +) +CREATE_WORKSPACE_RE = re.compile(r"^create\s+workspace/(\S+)\s+with\s+(.+)$") + + +def redact(text: str) -> str: + return SECRET_VALUE_RE.sub(r"\1=", text) + + +def add_result(results: list[tuple[str, str]], ok: bool, message: str) -> None: + results.append(("pass" if ok else "fail", message)) + + +def parse_create_workspace_goal(goal: str) -> tuple[pathlib.Path | None, str | None, str | None]: + match = CREATE_WORKSPACE_RE.match(goal.strip()) + if not match: + return None, None, None + + filename, expected = match.groups() + if not filename: + return None, None, "filename is empty" + if filename.startswith("/"): + return None, None, "filename must not start with /" + if ".." in filename: + return None, None, "filename must not contain .." + if "~" in filename: + return None, None, "filename must not contain ~" + if "/" in filename: + return None, None, "filename must not contain path separators" + + return pathlib.Path(".multi-agent") / "workspace" / filename, expected, None + + +def main() -> int: + parser = argparse.ArgumentParser(description="Verify files, content, commands, and logs.") + parser.add_argument("--file-exists", action="append", default=[]) + parser.add_argument("--file-contains", nargs=2, action="append", default=[], metavar=("PATH", "TEXT")) + parser.add_argument("--command", action="append", default=[]) + parser.add_argument("--goal") + parser.add_argument("--log-file") + args = parser.parse_args() + + results: list[tuple[str, str]] = [] + strict_goal_failed = False + + if args.goal: + path, expected, error = parse_create_workspace_goal(args.goal) + if error: + add_result(results, False, f"safe create goal rejected: {error}") + strict_goal_failed = True + elif path is not None and expected is not None: + raw_path = str(path) + exists = path.exists() + add_result(results, exists, f"file exists: {raw_path}") + if exists: + text = path.read_text(encoding="utf-8", errors="replace") + contains = expected in text + add_result(results, contains, f"file contains '{expected}': {raw_path}") + strict_goal_failed = strict_goal_failed or not contains + else: + add_result(results, False, f"file contains '{expected}': {raw_path} missing") + strict_goal_failed = True + + for raw_path in args.file_exists: + path = pathlib.Path(raw_path) + add_result(results, path.exists(), f"file exists: {raw_path}") + + for raw_path, expected in args.file_contains: + path = pathlib.Path(raw_path) + if not path.exists(): + add_result(results, False, f"file contains '{expected}': {raw_path} missing") + continue + text = path.read_text(encoding="utf-8", errors="replace") + add_result(results, expected in text, f"file contains '{expected}': {raw_path}") + + for command in args.command: + completed = subprocess.run( + command, + shell=True, + executable="/bin/bash", + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + timeout=120, + ) + output = redact(completed.stdout.strip()) + ok = completed.returncode == 0 + detail = f"command exit 0: {command}" + if output: + detail += f" | output: {output[:500]}" + add_result(results, ok, detail) + + if args.log_file: + path = pathlib.Path(args.log_file) + add_result(results, path.exists() and path.stat().st_size > 0, f"log exists and non-empty: {args.log_file}") + + if not results: + status = "partial" + elif strict_goal_failed: + status = "fail" + elif all(result == "pass" for result, _ in results): + status = "pass" + elif any(result == "pass" for result, _ in results): + status = "partial" + else: + status = "fail" + + print("## Verifier Result") + print() + print(f"- status: {status}") + print("- checks:") + for result, message in results: + print(f" - {result}: {redact(message)}") + print("- evidence:") + print(" - verifier executed locally") + print("- next_steps:") + if status == "pass": + print(" - none") + else: + print(" - inspect failed checks and rerun workflow") + + return {"pass": 0, "partial": 2, "fail": 1}[status] + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/multi_agent/scripts/write_memory.py b/multi_agent/scripts/write_memory.py new file mode 100755 index 0000000..ef79025 --- /dev/null +++ b/multi_agent/scripts/write_memory.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +"""Write daily, project, decision, failure, and lesson memory.""" + +from __future__ import annotations + +import argparse +import datetime as dt +import pathlib +import re +import sys + + +SECRET_VALUE_RE = re.compile( + r"(?i)(api[_-]?key|token|secret|password|passwd|cookie|credential|private[_-]?key)\s*[:=]\s*['\"]?[^'\"\s]+" +) + + +def redact(text: str) -> str: + return SECRET_VALUE_RE.sub(r"\1=", text or "") + + +def safe_slug(value: str) -> str: + value = re.sub(r"[^A-Za-z0-9_.-]+", "-", value.strip()) + return value.strip("-")[:120] or "task" + + +def write_text(path: pathlib.Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + + +def append_text(path: pathlib.Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as handle: + handle.write(text) + + +def main() -> int: + parser = argparse.ArgumentParser(description="Persist multi-agent memory safely.") + parser.add_argument("--task-id", required=True) + parser.add_argument("--task-type", required=True) + parser.add_argument("--summary", required=True) + parser.add_argument("--agents-used", required=True) + parser.add_argument("--files-changed", default="") + parser.add_argument("--commands-run", default="") + parser.add_argument("--verifier-result", default="") + parser.add_argument("--next-steps", default="") + parser.add_argument("--lesson", default="") + parser.add_argument("--root", default=".multi-agent") + args = parser.parse_args() + + root = pathlib.Path(args.root) + now = dt.datetime.now() + day = now.strftime("%Y-%m-%d") + clock = now.strftime("%H:%M:%S") + + task_id = redact(args.task_id) + task_type = redact(args.task_type) + summary = redact(args.summary) + agents_used = redact(args.agents_used) + files_changed = redact(args.files_changed) + commands_run = redact(args.commands_run) + verifier_result = redact(args.verifier_result) + next_steps = redact(args.next_steps) + + daily_entry = f""" +## {clock} {task_id} + +- task_type: {task_type} +- summary: {summary} +- agents_used: {agents_used} +- files_changed: {files_changed or "none"} +- commands_run: {commands_run or "none"} +- verifier_result: {verifier_result or "not_run"} +- next_steps: {next_steps or "none"} +""" + append_text(root / "memory" / "daily" / f"{day}.md", daily_entry) + + project_status = f"""# Project Status + +- project: openclaw-multi-agent-os +- state: active +- openclaw_config: .openclaw +- multi_agent_root: .multi-agent +- last_task: {task_id} +- last_task_type: {task_type} +- last_summary: {summary} +- last_verifier_result: {verifier_result or "not_run"} +- updated_at: {now.isoformat(timespec="seconds")} +""" + write_text(root / "memory" / "project" / "status.md", project_status) + + verifier_lower = verifier_result.lower() + if "fail" in verifier_lower: + failure = f"""# Failure: {task_id} + +- date: {now.isoformat(timespec="seconds")} +- task_type: {task_type} +- failure: {summary} +- verifier_result: {verifier_result} +- files_changed: {files_changed or "none"} +- commands_run: {commands_run or "none"} +- next_steps: {next_steps or "inspect failed checks and rerun workflow"} +""" + write_text(root / "memory" / "failures" / f"{safe_slug(task_id)}.md", failure) + + if task_type == "architecture": + decision = f"""# Architecture Decision: {task_id} + +- date: {now.isoformat(timespec="seconds")} +- context: {summary} +- decision: recorded architecture workflow result +- alternatives: see run logs and Hermes review +- risks: see Guard decision and Verifier result +- verification: {verifier_result or "not_run"} +""" + write_text(root / "memory" / "decisions" / f"{safe_slug(task_id)}.md", decision) + + if args.lesson.strip(): + lesson = f"""# Lesson: {task_id} + +- date: {now.isoformat(timespec="seconds")} +- source_task: {task_id} +- lesson: {redact(args.lesson)} +- applies_to: {task_type} +- avoid: repeating known failure modes without verification +""" + write_text(root / "memory" / "lessons" / f"{safe_slug(task_id)}.md", lesson) + + print(f"memory_written: {root / 'memory' / 'daily' / (day + '.md')}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/multi_agent/workflows/analysis.yaml b/multi_agent/workflows/analysis.yaml new file mode 100644 index 0000000..105cab0 --- /dev/null +++ b/multi_agent/workflows/analysis.yaml @@ -0,0 +1,15 @@ +name: analysis +description: Read-only investigation and planning workflow. +stages: + - main + - router + - scout + - analyst + - guard + - memory +execution: + writes_project_files: false + requires_guard: true + requires_verifier: false + requires_hermes: false + diff --git a/multi_agent/workflows/architecture.yaml b/multi_agent/workflows/architecture.yaml new file mode 100644 index 0000000..6908212 --- /dev/null +++ b/multi_agent/workflows/architecture.yaml @@ -0,0 +1,19 @@ +name: architecture +description: Complex architecture, refactor, or multi-file workflow. +stages: + - main + - router + - scout + - analyst + - hermes_reviewer + - analyst_revision + - guard + - executor + - verifier + - memory +execution: + writes_project_files: true + requires_guard: true + requires_verifier: true + requires_hermes: true + diff --git a/multi_agent/workflows/debug.yaml b/multi_agent/workflows/debug.yaml new file mode 100644 index 0000000..d81387d --- /dev/null +++ b/multi_agent/workflows/debug.yaml @@ -0,0 +1,17 @@ +name: debug +description: Bug investigation and repair workflow. +stages: + - main + - router + - scout + - analyst + - guard + - executor + - verifier + - memory +execution: + writes_project_files: true + requires_guard: true + requires_verifier: true + requires_hermes: false + diff --git a/multi_agent/workflows/info.yaml b/multi_agent/workflows/info.yaml new file mode 100644 index 0000000..9cdfd53 --- /dev/null +++ b/multi_agent/workflows/info.yaml @@ -0,0 +1,13 @@ +name: info +description: Read-only information workflow. +stages: + - main + - router + - scout + - memory +execution: + writes_project_files: false + requires_guard: false + requires_verifier: false + requires_hermes: false + diff --git a/multi_agent/workflows/modify.yaml b/multi_agent/workflows/modify.yaml new file mode 100644 index 0000000..b30c32b --- /dev/null +++ b/multi_agent/workflows/modify.yaml @@ -0,0 +1,17 @@ +name: modify +description: Normal low-to-medium risk modification workflow. +stages: + - main + - router + - scout + - analyst + - guard + - executor + - verifier + - memory +execution: + writes_project_files: true + requires_guard: true + requires_verifier: true + requires_hermes: false + diff --git a/multi_agent/workflows/risky.yaml b/multi_agent/workflows/risky.yaml new file mode 100644 index 0000000..787c8f4 --- /dev/null +++ b/multi_agent/workflows/risky.yaml @@ -0,0 +1,18 @@ +name: risky +description: High-risk workflow that must stop for explicit user confirmation. +stages: + - main + - router + - scout + - analyst + - hermes_reviewer + - guard + - ask_user + - memory +execution: + writes_project_files: false + requires_guard: true + requires_verifier: false + requires_hermes: true + requires_user_confirm: true + diff --git a/templates/.multi-agent/config.yaml.example b/templates/.multi-agent/config.yaml.example new file mode 100644 index 0000000..44a74c2 --- /dev/null +++ b/templates/.multi-agent/config.yaml.example @@ -0,0 +1,94 @@ +# ============================================================================= +# Multi Agent Lab — Configuration Template +# ============================================================================= +# Copy this file to multi_agent/config.yaml and fill in your values. +# NEVER commit a config.yaml with real API keys, tokens, or secrets. +# ============================================================================= + +project: + # Human-readable project name + name: "my-multi-agent-lab" + # Root of the project (where this config lives) + root: "." + # Where executor is allowed to write files (relative to root) + workspace: "multi_agent/workspace" + # Path to your local openclaw config (typically ~/.openclaw or custom) + openclaw_config: "~/.openclaw" + +# ============================================================================= +# Backend Tool Detection +# ============================================================================= +# Set required: false for optional backends. +# The system works with just shell (bash/python3) — others are optional. +# ============================================================================= + +tools: + openclaw: + # Command to invoke openclaw CLI. Must be in PATH or use absolute path. + command: "openclaw" + required: false + hermes: + # Command to invoke hermes CLI. + command: "hermes" + required: false + claude_code: + # Command to invoke Claude Code CLI. + command: "claude" + required: false + +# ============================================================================= +# Safety Policy +# ============================================================================= + +policy: + # Allow file writes (controlled by workspace boundary) + allow_file_write: true + # Allow file deletion (not recommended to enable) + allow_delete: false + # Allow modification of system paths (/etc, /usr, /var, /opt) + allow_system_path_modify: false + # Guard MUST approve every execution before Executor runs + require_guard_before_execute: true + # Verifier MUST check output after Executor completes + require_verifier_after_execute: true + # Memory Manager writes a record after every task + require_memory_after_task: true + +# ============================================================================= +# Workflow Definitions +# ============================================================================= +# Each workflow type maps to a YAML file that defines the agent pipeline. +# Paths are relative to the project root. +# ============================================================================= + +workflows: + info: "multi_agent/workflows/info.yaml" + analysis: "multi_agent/workflows/analysis.yaml" + modify: "multi_agent/workflows/modify.yaml" + debug: "multi_agent/workflows/debug.yaml" + architecture: "multi_agent/workflows/architecture.yaml" + risky: "multi_agent/workflows/risky.yaml" + +# ============================================================================= +# Memory Layout +# ============================================================================= +# These paths define where memory records are stored. +# They are created automatically if they don't exist. +# ============================================================================= + +memory: + daily: "multi_agent/memory/daily" + project: "multi_agent/memory/project" + decisions: "multi_agent/memory/decisions" + failures: "multi_agent/memory/failures" + lessons: "multi_agent/memory/lessons" + +# ============================================================================= +# Log Storage +# ============================================================================= +# Runtime logs are stored here. These directories MUST NOT be committed. +# ============================================================================= + +logs: + runs: "multi_agent/logs/runs" + messages: "multi_agent/logs/messages" diff --git a/templates/env.example b/templates/env.example new file mode 100644 index 0000000..58bad65 --- /dev/null +++ b/templates/env.example @@ -0,0 +1,42 @@ +# ============================================================================= +# Environment Variables Template +# ============================================================================= +# Copy this file to .env in the project root and fill in your values. +# The .env file is in .gitignore — NEVER commit it with real values. +# ============================================================================= + +# ----------------------------------------------------------------------------- +# OpenClaw Configuration +# ----------------------------------------------------------------------------- +# Path to your openclaw config directory +# export OPENCLAW_CONFIG_PATH="$HOME/.openclaw" + +# Model to use for openclaw agent calls (format: provider/model) +# export OPENCLAW_MODEL="minimax-m2-7-highspeed/MiniMax-M2.7-highspeed" + +# ----------------------------------------------------------------------------- +# Hermes CLI (optional — used for Hermes Reviewer adapter) +# ----------------------------------------------------------------------------- +# export HERMES_CONFIG_PATH="$HOME/.config/hermes" + +# ----------------------------------------------------------------------------- +# Claude Code (optional — used for claude_code adapter) +# ----------------------------------------------------------------------------- +# export CLAUDE_CODE_API_KEY="sk-ant-..." + +# ----------------------------------------------------------------------------- +# Custom LLM Providers (optional) +# ----------------------------------------------------------------------------- +# If you have your own API-compatible model endpoint, add it here. +# export CUSTOM_LLM_BASE_URL="https://api.example.com/v1" +# export CUSTOM_LLM_API_KEY="sk-your-key-here" +# export CUSTOM_LLM_MODEL="your-model-id" + +# ----------------------------------------------------------------------------- +# Debug / Development +# ----------------------------------------------------------------------------- +# Set to 1 to enable verbose script output +# export DEBUG=0 + +# Set to 1 to dry-run all workflows (skip executor) +# export DRY_RUN=1 diff --git a/tests/smoke_public.sh b/tests/smoke_public.sh index 75fee64..9ee1562 100755 --- a/tests/smoke_public.sh +++ b/tests/smoke_public.sh @@ -8,7 +8,7 @@ echo # Check root files echo "[1] Checking root files..." -for f in README.md LICENSE CONTRIBUTING.md ROADMAP.md SECURITY.md .gitignore; do +for f in README.md LICENSE .gitignore; do if [[ -f "$f" ]]; then echo " OK: $f" else @@ -19,31 +19,35 @@ done # Check core skeleton files echo -echo "[2] Checking .multi-agent core files..." +echo "[2] Checking multi_agent/ core files..." for f in \ - .multi-agent/README.md \ - .multi-agent/config.yaml \ - .multi-agent/agents/main.md \ - .multi-agent/agents/router.md \ - .multi-agent/agents/scout.md \ - .multi-agent/agents/analyst.md \ - .multi-agent/agents/guard.md \ - .multi-agent/agents/executor.md \ - .multi-agent/agents/verifier.md \ - .multi-agent/agents/memory_manager.md \ - .multi-agent/workflows/info.yaml \ - .multi-agent/workflows/modify.yaml \ - .multi-agent/workflows/analysis.yaml \ - .multi-agent/workflows/debug.yaml \ - .multi-agent/workflows/architecture.yaml \ - .multi-agent/workflows/risky.yaml \ - .multi-agent/scripts/run_workflow.sh \ - .multi-agent/scripts/verify.py \ - .multi-agent/scripts/guard_check.py \ - .multi-agent/adapters/openclaw_adapter.md \ - .multi-agent/adapters/hermes_adapter.md \ - .multi-agent/adapters/claude_code_adapter.md \ - .multi-agent/memory/templates/daily.md; do + multi_agent/README.md \ + multi_agent/config.yaml \ + multi_agent/agents/main.md \ + multi_agent/agents/router.md \ + multi_agent/agents/scout.md \ + multi_agent/agents/analyst.md \ + multi_agent/agents/guard.md \ + multi_agent/agents/executor.md \ + multi_agent/agents/verifier.md \ + multi_agent/agents/memory_manager.md \ + multi_agent/workflows/info.yaml \ + multi_agent/workflows/modify.yaml \ + multi_agent/workflows/analysis.yaml \ + multi_agent/workflows/debug.yaml \ + multi_agent/workflows/architecture.yaml \ + multi_agent/workflows/risky.yaml \ + multi_agent/scripts/run_workflow.sh \ + multi_agent/scripts/verify.py \ + multi_agent/scripts/guard_check.py \ + multi_agent/adapters/openclaw_adapter.md \ + multi_agent/adapters/hermes_adapter.md \ + multi_agent/adapters/claude_code_adapter.md \ + multi_agent/adapters/shell_adapter.md \ + multi_agent/memory/templates/daily.md \ + multi_agent/memory/templates/decision.md \ + multi_agent/memory/templates/failure.md \ + multi_agent/memory/templates/lesson.md; do if [[ -f "$f" ]]; then echo " OK: $f" else @@ -97,7 +101,7 @@ fi echo echo "[6] Checking bash script syntax..." shopt -s nullglob -for f in .multi-agent/scripts/*.sh tests/*.sh; do +for f in multi_agent/scripts/*.sh tests/*.sh; do if bash -n "$f" 2>/dev/null; then echo " OK: $f" else @@ -111,7 +115,7 @@ shopt -u nullglob echo echo "[7] Checking Python script syntax..." shopt -s nullglob -for f in .multi-agent/scripts/*.py tests/*.py; do +for f in multi_agent/scripts/*.py tests/*.py; do if python3 -m py_compile "$f" 2>/dev/null; then echo " OK: $f" else @@ -125,10 +129,10 @@ shopt -u nullglob echo echo "[8] Checking forbidden directories not tracked by git..." for dir in \ - .multi-agent/logs \ - .multi-agent/memory/daily \ - .multi-agent/memory/failures \ - .multi-agent/memory/decisions; do + multi_agent/logs \ + multi_agent/memory/daily \ + multi_agent/memory/failures \ + multi_agent/memory/decisions; do tracked=$(git ls-files --error-unmatch "$dir" 2>/dev/null && echo YES || echo NO) if [[ "$tracked" == "YES" ]]; then echo " TRACKED (should not be): $dir" @@ -142,11 +146,11 @@ done echo echo "[9] Checking .gitignore coverage..." required_patterns=( - ".multi-agent/logs/" - ".multi-agent/memory/daily/" - ".multi-agent/memory/failures/" - ".multi-agent/memory/decisions/" - ".multi-agent/workspace/" + "logs/" + "memory/daily/" + "memory/failures/" + "memory/decisions/" + "workspace/" ".env" ".ssh/" ".openclaw/" @@ -163,7 +167,6 @@ done # Check README has no obviously fake/inflated claims echo echo "[10] Checking README honesty..." -# Allow "not a production system" (honest disclaimer) but flag inflated claims if grep -qi "fully functional\|production-ready\|fully integrated\|complete system" README.md 2>/dev/null; then echo " WARNING: README may contain inflated claims" exit 1 @@ -171,15 +174,31 @@ else echo " OK: README is appropriately modest" fi -# Check MiMo doc clearly marks integration as planned +# Check model integration doc status echo -echo "[11] Checking MiMo doc status..." +echo "[11] Checking model integration doc status..." if grep -qi "not.*integrat\|planned\|target\|future\|aspirational" docs/model-integration-roadmap.md 2>/dev/null; then - echo " OK: MiMo integration is clearly marked as planned" + echo " OK: model integration is clearly marked as planned" else - echo " WARNING: MiMo integration status unclear in docs/mimo-orbit.md" + echo " WARNING: model integration status unclear" exit 1 fi +# Check no real API keys / tokens / secrets are present +echo +echo "[12] Scanning for leaked secrets..." +LEAKED=$(grep -rEl \ + "sk-[a-zA-Z0-9]{20,}|AIza[a-zA-Z0-9_-]{35,}|ghp_[a-zA-Z0-9]{36,}" \ + --include="*.md" --include="*.sh" --include="*.py" \ + --include="*.yaml" --include="*.yml" \ + . 2>/dev/null | grep -v ".git/" || true) +if [[ -n "$LEAKED" ]]; then + echo " LEAKED SECRETS FOUND:" + echo "$LEAKED" + exit 1 +else + echo " OK: no obvious API keys/tokens detected" +fi + echo echo "=== public smoke test passed ===" From 713cec04625b3f2816f0dd267f52f9102292de58 Mon Sep 17 00:00:00 2001 From: le Date: Wed, 29 Apr 2026 16:14:01 +0800 Subject: [PATCH 5/6] add .gitattributes to enforce LF line endings on all text files --- .gitattributes | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c9fdfbe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,8 @@ + +# Force explicit line-ending handling for these file types +*.sh text eol=lf +*.py text eol=lf +*.yaml text eol=lf +*.yml text eol=lf +*.md text eol=lf +.gitignore text eol=lf From bd5ad95d3b2b619b03169304cd975b6421bbec1e Mon Sep 17 00:00:00 2001 From: le Date: Wed, 29 Apr 2026 16:30:02 +0800 Subject: [PATCH 6/6] =?UTF-8?q?fix=20public=20skeleton=20path=20references?= =?UTF-8?q?:=20.multi-agent=20=E2=86=92=20multi=5Fagent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- multi_agent/adapters/openclaw_adapter.md | 2 +- multi_agent/scripts/run_workflow.sh | 12 ++++++------ multi_agent/scripts/verify.py | 2 +- multi_agent/scripts/write_memory.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/multi_agent/adapters/openclaw_adapter.md b/multi_agent/adapters/openclaw_adapter.md index 7c947c7..e184f10 100644 --- a/multi_agent/adapters/openclaw_adapter.md +++ b/multi_agent/adapters/openclaw_adapter.md @@ -1,6 +1,6 @@ # OpenClaw Adapter -OpenClaw 是总调度入口。`.multi-agent` 不破坏已有 `.openclaw/`,只通过配置引用它。 +OpenClaw 是总调度入口。`multi_agent/` 不破坏已有 `.openclaw/`,只通过配置引用它。 ## 角色 diff --git a/multi_agent/scripts/run_workflow.sh b/multi_agent/scripts/run_workflow.sh index bad027a..f925a10 100755 --- a/multi_agent/scripts/run_workflow.sh +++ b/multi_agent/scripts/run_workflow.sh @@ -225,7 +225,7 @@ reason: classified from explicit --type and goal keywords EOF log "stage router: $ROUTER_FILE" -KEY_FILES="$(find .multi-agent -maxdepth 2 -type f | sort | head -n 80)" +KEY_FILES="$(find multi_agent -maxdepth 2 -type f | sort | head -n 80)" OPENCLAW_STATE="missing" if [[ -d ".openclaw" ]]; then OPENCLAW_STATE="present at .openclaw" @@ -238,17 +238,17 @@ cat > "$SCOUT_FILE" </dev/null || printf 'no') - 疑似问题: - none from read-only scout - 交给 Analyst 的信息: - - Use .multi-agent as the bounded orchestration layer. + - Use multi_agent as the bounded orchestration layer. - Do not modify .openclaw unless a later plan explicitly scopes it. EOF AGENTS_USED="$AGENTS_USED,scout" @@ -294,7 +294,7 @@ elif [[ "$TASK_TYPE" == "architecture" ]]; then - multi_agent/scripts/*.py - multi_agent/README.md" PLAN_COMMANDS="- execute only after Hermes approve or Analyst revision -- keep changes within .multi-agent" +- keep changes within multi_agent" PLAN_VERIFY="- run smoke test - inspect logs and memory output" HERMES_REQUIRED="yes" @@ -378,7 +378,7 @@ if [[ "$TASK_TYPE" == "architecture" || "$TASK_TYPE" == "risky" ]]; then - revision_status: incorporated - changes: - - tightened execution scope to .multi-agent only + - tightened execution scope to multi_agent only - kept verification explicit - kept deletion and system-path changes out of scope - analyst_revision: true diff --git a/multi_agent/scripts/verify.py b/multi_agent/scripts/verify.py index 3e8e12f..b6b52c3 100755 --- a/multi_agent/scripts/verify.py +++ b/multi_agent/scripts/verify.py @@ -41,7 +41,7 @@ def parse_create_workspace_goal(goal: str) -> tuple[pathlib.Path | None, str | N if "/" in filename: return None, None, "filename must not contain path separators" - return pathlib.Path(".multi-agent") / "workspace" / filename, expected, None + return pathlib.Path("multi_agent") / "workspace" / filename, expected, None def main() -> int: diff --git a/multi_agent/scripts/write_memory.py b/multi_agent/scripts/write_memory.py index ef79025..b60f133 100755 --- a/multi_agent/scripts/write_memory.py +++ b/multi_agent/scripts/write_memory.py @@ -46,7 +46,7 @@ def main() -> int: parser.add_argument("--verifier-result", default="") parser.add_argument("--next-steps", default="") parser.add_argument("--lesson", default="") - parser.add_argument("--root", default=".multi-agent") + parser.add_argument("--root", default="multi_agent") args = parser.parse_args() root = pathlib.Path(args.root) @@ -81,7 +81,7 @@ def main() -> int: - project: openclaw-multi-agent-os - state: active - openclaw_config: .openclaw -- multi_agent_root: .multi-agent +- multi_agent_root: multi_agent - last_task: {task_id} - last_task_type: {task_type} - last_summary: {summary}