diff --git a/README.md b/README.md index 15149fad..a1a0939b 100644 --- a/README.md +++ b/README.md @@ -742,6 +742,7 @@ docker-compose up - AgentSkill delegated authority fixtures guide: [`docs/guides/agentskill-authz-interop-fixtures.md`](docs/guides/agentskill-authz-interop-fixtures.md) - action_ref identity/explainability profile: [`docs/specs/action-ref-identity-explainability-profile.md`](docs/specs/action-ref-identity-explainability-profile.md) - Payment governance profile (Economic Layer v1): [`docs/specs/payment-governance-profile-v1.md`](docs/specs/payment-governance-profile-v1.md) +- Agent commerce governance profile: [`docs/specs/agent-commerce-governance-profile-v1.md`](docs/specs/agent-commerce-governance-profile-v1.md) - OpenAI Agents SDK governance guide: [`docs/guides/openai-agents-sdk-integration.md`](docs/guides/openai-agents-sdk-integration.md) - Cursor integration guide: [`docs/guides/cursor-integration.md`](docs/guides/cursor-integration.md) - Benchmark report: [`docs/reports/industrial-benchmark-report.md`](docs/reports/industrial-benchmark-report.md) diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index cb81637c..f09615b9 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -94,6 +94,16 @@ export default defineConfig({ { text: "Collaboration Reply Playbook", link: "/community/open-source-collaboration-replies" }, ], }, + { + text: "Specs", + items: [ + { text: "Agent Commerce Governance Profile", link: "/specs/agent-commerce-governance-profile-v1" }, + { text: "Payment Governance Profile", link: "/specs/payment-governance-profile-v1" }, + { text: "Action Ref Explainability Profile", link: "/specs/action-ref-identity-explainability-profile" }, + { text: "APS-SINT Handshake", link: "/specs/aps-sint-handshake-v1" }, + { text: "SINT Industrial Action Profile", link: "/specs/sint-industrial-action-profile" }, + ], + }, { text: "Roadmaps", items: [ diff --git a/docs/index.md b/docs/index.md index cb8fb1cb..152d4ed9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -59,6 +59,7 @@ limits, and tamper-evident audit evidence before execution. - ISO 13482 alignment: [Compliance/ISO 13482](./compliance/iso-13482-alignment.md) - Formal threat model: [Security/Formal Threat Model](./security/formal-threat-model.md) - MITRE ATLAS candidate mappings: [Security/MITRE ATLAS](./security/mitre-atlas-agent-technique-mappings.md) +- Agent commerce governance profile: [Spec](./specs/agent-commerce-governance-profile-v1.md) - NIST submission bundle report: [Report](./reports/nist-submission-bundle.md) - Latest security bulletin: [May 2026](./security-bulletins/2026-05.md) diff --git a/docs/marketing-message-map.md b/docs/marketing-message-map.md index 34a853c2..bf9bcfe6 100644 --- a/docs/marketing-message-map.md +++ b/docs/marketing-message-map.md @@ -69,6 +69,12 @@ Every decision is captured in a tamper-evident, hash-chained ledger for audit an SINT works across execution surfaces such as MCP, A2A, ROS 2, MAVLink, MQTT/Sparkplug, OPC UA, Open-RMF, and gRPC. +### 6. Agent commerce + +SINT can govern task-market and machine-payment workflows before work or money +moves: task creation, bids, claims, benchmark proof submission, worker +selection, settlement release, and x402-style payment permits. + ## Proof points to emphasize - Apache-2.0 licensed @@ -90,6 +96,8 @@ SINT works across execution surfaces such as MCP, A2A, ROS 2, MAVLink, MQTT/Spar - "approval tiers" - "physical AI governance" - "industrial AI safety" +- "agent commerce governance" +- "x402 policy enforcement" - "tamper-evident evidence ledger" - "open protocol and reference stack" - "real-world consequences" diff --git a/docs/specs/agent-commerce-governance-profile-v1.md b/docs/specs/agent-commerce-governance-profile-v1.md new file mode 100644 index 00000000..830edd11 --- /dev/null +++ b/docs/specs/agent-commerce-governance-profile-v1.md @@ -0,0 +1,110 @@ +# Agent Commerce Governance Profile v1 + +Status: executable conformance profile + +This profile defines a transport-neutral governance contract for agent-to-agent +task markets, x402-style machine payments, and machine-router workflows. It is +inspired by current task-market patterns such as bounty, claim, pitch, +benchmark, and auction modes, but it is not vendor-specific. + +## Scope + +Agent commerce systems need two separate controls: + +1. **Market state control**: an agent should only create, bid, claim, submit, + accept, rate, or settle a task when the task state and agent authority allow + that transition. +2. **Payment control**: an agent should only sign or submit x402-style payment + permits when spend caps, expiry, recipient, session, and policy limits are + valid. + +SINT provides the pre-action policy boundary for both. Marketplaces remain free +to implement escrow, identity registries, reputation, and settlement rails; SINT +defines the observable policy outcomes that must happen before execution. + +## Governed Actions + +The profile uses these resource and action names: + +| Resource | Action | Typical tier | +|---|---|---| +| `market://task/*` | `create` | T1 or T2 when reward is high | +| `market://task/*` | `pitch` | T1 | +| `market://task/*` | `bid` | T1 or T2 when paid | +| `market://task/*` | `claim` | T2 when exclusive or staked | +| `market://task/*` | `submit` | T1 | +| `market://task/*` | `submit_proof` | T1 | +| `market://task/*` | `select_worker` | T2 | +| `market://task/*` | `accept` | T3 when payment releases | +| `market://settlement/*` | `release` | T3 | +| `x402://session/*` | `authorize_permit` | T2 or T3 when cap is high | + +## Required Controls + +1. Agent identity must be registered before reputation-bearing work. +2. Capability scope must include the requested market action. +3. Task-mode state transitions must be monotonic and valid. +4. Exclusive claim tasks must deny non-selected workers. +5. Auction bids must satisfy the active price-discovery rule. +6. Benchmark submissions must include a proof digest and metric value. +7. x402 permits must enforce cap, expiry, recipient, and session constraints. +8. Settlement must only release after accepted work and must reject receipt + replay. +9. High-value task creation or settlement must escalate to human approval. +10. Every deny, escalate, accept, and release decision must carry an evidence + reference. + +## Decision Reasons + +The executable fixtures use the following stable outcome reasons: + +- `ALLOW` +- `AGENT_IDENTITY_REQUIRED` +- `SCOPE_NOT_AUTHORIZED` +- `REPUTATION_BELOW_THRESHOLD` +- `VALUE_REQUIRES_APPROVAL` +- `STATE_TRANSITION_INVALID` +- `BID_NOT_COMPETITIVE` +- `PROOF_REQUIRED` +- `X402_CAP_EXCEEDED` +- `X402_PERMIT_EXPIRED` +- `RECIPIENT_NOT_ALLOWLISTED` +- `SETTLEMENT_STATE_INVALID` +- `RECEIPT_REPLAY` + +## Fixture Contract + +Fixture file: + +- `packages/conformance-tests/fixtures/economy/agent-commerce-governance.v1.json` + +Executable test: + +- `packages/conformance-tests/src/agent-commerce-governance-conformance.test.ts` + +Run: + +```bash +pnpm --filter @pshkv/conformance-tests exec vitest run src/agent-commerce-governance-conformance.test.ts +``` + +## Reference Systems + +The profile is designed to map cleanly onto emerging agent-commerce systems: + +- Task modes such as bounty, claim, pitch, benchmark, and auction: + `https://docs-market.daydreams.systems/concepts/task-modes` +- x402 permit/session routing with spend caps and expiry: + `https://router.daydreams.systems/how-it-works` +- scoped device signing and revocation: + `https://docs-market.daydreams.systems/identity/device-setup` +- ERC-8004 style agent identity and reputation linkage: + `https://docs-market.daydreams.systems/identity/agent-registration` + +## Non-Goals + +- This profile does not implement escrow, auctions, or x402 settlement. +- This profile does not claim compatibility with any specific marketplace API. +- This profile does not define a new payment rail. + +It defines the policy contract SINT can enforce before those systems execute. diff --git a/packages/conformance-tests/fixtures/economy/agent-commerce-governance.v1.json b/packages/conformance-tests/fixtures/economy/agent-commerce-governance.v1.json new file mode 100644 index 00000000..1d6463aa --- /dev/null +++ b/packages/conformance-tests/fixtures/economy/agent-commerce-governance.v1.json @@ -0,0 +1,499 @@ +{ + "fixtureId": "sint.economy.agent-commerce-governance.v1", + "schemaVersion": "2026-05-29", + "description": "Executable governance fixture for agent-to-agent task markets and x402-style machine payments: identity, scoped market actions, task-mode transitions, reputation gates, auction rules, benchmark proofs, x402 spend caps, and settlement receipts.", + "profile": { + "resources": [ + "market://task/*", + "market://settlement/*", + "x402://session/*" + ], + "actions": [ + "create", + "pitch", + "bid", + "claim", + "submit", + "submit_proof", + "select_worker", + "accept", + "release", + "authorize_permit" + ], + "taskModes": [ + "bounty", + "claim", + "pitch", + "benchmark", + "auction" + ], + "decisionReasons": [ + "ALLOW", + "AGENT_IDENTITY_REQUIRED", + "SCOPE_NOT_AUTHORIZED", + "REPUTATION_BELOW_THRESHOLD", + "VALUE_REQUIRES_APPROVAL", + "STATE_TRANSITION_INVALID", + "BID_NOT_COMPETITIVE", + "PROOF_REQUIRED", + "X402_CAP_EXCEEDED", + "X402_PERMIT_EXPIRED", + "RECIPIENT_NOT_ALLOWLISTED", + "SETTLEMENT_STATE_INVALID", + "RECEIPT_REPLAY" + ], + "evidenceRequiredFor": [ + "deny", + "escalate", + "select_worker", + "accept", + "release" + ] + }, + "defaults": { + "referenceTime": "2026-05-29T12:00:00.000Z", + "minReputationScore": 0.7, + "highValueApprovalThresholdUsdc": 50, + "maxX402SessionCapUsdc": 25, + "maxPermitExpiryMinutes": 60, + "allowedRecipients": [ + "merchant://taskmarket-escrow", + "merchant://x402-router", + "agent://requester-alpha", + "agent://worker-verified" + ], + "registeredAgents": [ + "agent://requester-alpha", + "agent://worker-verified", + "agent://worker-new" + ], + "reputationByAgent": { + "agent://requester-alpha": 0.93, + "agent://worker-verified": 0.88, + "agent://worker-new": 0.41 + }, + "usedSettlementReceiptIds": [ + "settlement-receipt-duplicate" + ] + }, + "cases": [ + { + "name": "allow bounty task creation within value threshold", + "principal": "agent://requester-alpha", + "capability": { + "resources": ["market://task/*"], + "actions": ["create"] + }, + "request": { + "resource": "market://task/t-001", + "action": "create", + "task": { + "id": "t-001", + "mode": "bounty", + "status": "draft", + "rewardUsdc": 12, + "recipient": "merchant://taskmarket-escrow" + } + }, + "expected": { + "decision": "allow", + "reason": "ALLOW", + "nextStatus": "open", + "evidenceRequired": false + } + }, + { + "name": "escalate high value benchmark creation", + "principal": "agent://requester-alpha", + "capability": { + "resources": ["market://task/*"], + "actions": ["create"] + }, + "request": { + "resource": "market://task/t-002", + "action": "create", + "task": { + "id": "t-002", + "mode": "benchmark", + "status": "draft", + "rewardUsdc": 125, + "recipient": "merchant://taskmarket-escrow", + "metricDescription": "lowest p95 latency with passing tests", + "metricTarget": "p95_ms <= 250" + } + }, + "expected": { + "decision": "escalate", + "reason": "VALUE_REQUIRES_APPROVAL", + "requiredTier": "T3_COMMIT", + "nextStatus": "open", + "evidenceRequired": true + } + }, + { + "name": "deny unregistered worker claim", + "principal": "agent://worker-unregistered", + "capability": { + "resources": ["market://task/*"], + "actions": ["claim"] + }, + "request": { + "resource": "market://task/t-003", + "action": "claim", + "task": { + "id": "t-003", + "mode": "claim", + "status": "open", + "rewardUsdc": 8, + "stakeRequiredUsdc": 1 + } + }, + "expected": { + "decision": "deny", + "reason": "AGENT_IDENTITY_REQUIRED", + "evidenceRequired": true + } + }, + { + "name": "deny low reputation exclusive claim", + "principal": "agent://worker-new", + "capability": { + "resources": ["market://task/*"], + "actions": ["claim"] + }, + "request": { + "resource": "market://task/t-004", + "action": "claim", + "task": { + "id": "t-004", + "mode": "claim", + "status": "open", + "rewardUsdc": 10, + "stakeRequiredUsdc": 2 + } + }, + "expected": { + "decision": "deny", + "reason": "REPUTATION_BELOW_THRESHOLD", + "evidenceRequired": true + } + }, + { + "name": "allow verified worker claim", + "principal": "agent://worker-verified", + "capability": { + "resources": ["market://task/*"], + "actions": ["claim"] + }, + "request": { + "resource": "market://task/t-005", + "action": "claim", + "task": { + "id": "t-005", + "mode": "claim", + "status": "open", + "rewardUsdc": 10, + "stakeRequiredUsdc": 2 + } + }, + "expected": { + "decision": "allow", + "reason": "ALLOW", + "nextStatus": "claimed", + "evidenceRequired": false + } + }, + { + "name": "allow pitch submission without payment", + "principal": "agent://worker-verified", + "capability": { + "resources": ["market://task/*"], + "actions": ["pitch"] + }, + "request": { + "resource": "market://task/t-006", + "action": "pitch", + "task": { + "id": "t-006", + "mode": "pitch", + "status": "open", + "rewardUsdc": 20 + } + }, + "expected": { + "decision": "allow", + "reason": "ALLOW", + "nextStatus": "open", + "evidenceRequired": false + } + }, + { + "name": "allow requester to select pitch worker with evidence", + "principal": "agent://requester-alpha", + "capability": { + "resources": ["market://task/*"], + "actions": ["select_worker"] + }, + "request": { + "resource": "market://task/t-007", + "action": "select_worker", + "task": { + "id": "t-007", + "mode": "pitch", + "status": "open", + "rewardUsdc": 20, + "selectedWorker": "agent://worker-verified" + } + }, + "expected": { + "decision": "allow", + "reason": "ALLOW", + "nextStatus": "worker_selected", + "evidenceRequired": true + } + }, + { + "name": "deny auction bid that does not undercut current lowest bid", + "principal": "agent://worker-verified", + "capability": { + "resources": ["market://task/*"], + "actions": ["bid"] + }, + "request": { + "resource": "market://task/t-008", + "action": "bid", + "task": { + "id": "t-008", + "mode": "auction", + "auctionType": "english", + "status": "open", + "rewardUsdc": 20, + "maxPriceUsdc": 20, + "currentLowestBidUsdc": 7 + }, + "bid": { + "priceUsdc": 8 + } + }, + "expected": { + "decision": "deny", + "reason": "BID_NOT_COMPETITIVE", + "evidenceRequired": true + } + }, + { + "name": "deny benchmark submission without proof digest", + "principal": "agent://worker-verified", + "capability": { + "resources": ["market://task/*"], + "actions": ["submit_proof"] + }, + "request": { + "resource": "market://task/t-009", + "action": "submit_proof", + "task": { + "id": "t-009", + "mode": "benchmark", + "status": "open", + "rewardUsdc": 15 + }, + "proof": { + "type": "benchmark", + "metricName": "p95_latency_ms", + "metricValue": 212 + } + }, + "expected": { + "decision": "deny", + "reason": "PROOF_REQUIRED", + "evidenceRequired": true + } + }, + { + "name": "allow benchmark submission with digest and metric", + "principal": "agent://worker-verified", + "capability": { + "resources": ["market://task/*"], + "actions": ["submit_proof"] + }, + "request": { + "resource": "market://task/t-010", + "action": "submit_proof", + "task": { + "id": "t-010", + "mode": "benchmark", + "status": "open", + "rewardUsdc": 15 + }, + "proof": { + "type": "benchmark", + "metricName": "p95_latency_ms", + "metricValue": 184, + "digest": "sha256:2eb0c9f6a12a5e42b15b50f63783c2f401de42ccbb6ca2b1e5ceef07d0e2b2f7" + } + }, + "expected": { + "decision": "allow", + "reason": "ALLOW", + "nextStatus": "pending_approval", + "evidenceRequired": false + } + }, + { + "name": "allow requester accept low-value work with evidence", + "principal": "agent://requester-alpha", + "capability": { + "resources": ["market://task/*"], + "actions": ["accept"] + }, + "request": { + "resource": "market://task/t-011", + "action": "accept", + "task": { + "id": "t-011", + "mode": "bounty", + "status": "pending_approval", + "rewardUsdc": 12, + "selectedWorker": "agent://worker-verified" + } + }, + "expected": { + "decision": "allow", + "reason": "ALLOW", + "nextStatus": "accepted", + "evidenceRequired": true + } + }, + { + "name": "deny x402 permit above session cap", + "principal": "agent://worker-verified", + "capability": { + "resources": ["x402://session/*"], + "actions": ["authorize_permit"] + }, + "request": { + "resource": "x402://session/s-012", + "action": "authorize_permit", + "x402": { + "sessionId": "s-012", + "recipient": "merchant://x402-router", + "permitCapUsdc": 30, + "expiresAt": "2026-05-29T12:30:00.000Z" + } + }, + "expected": { + "decision": "deny", + "reason": "X402_CAP_EXCEEDED", + "evidenceRequired": true + } + }, + { + "name": "deny expired x402 permit", + "principal": "agent://worker-verified", + "capability": { + "resources": ["x402://session/*"], + "actions": ["authorize_permit"] + }, + "request": { + "resource": "x402://session/s-013", + "action": "authorize_permit", + "x402": { + "sessionId": "s-013", + "recipient": "merchant://x402-router", + "permitCapUsdc": 5, + "expiresAt": "2026-05-29T11:59:59.000Z" + } + }, + "expected": { + "decision": "deny", + "reason": "X402_PERMIT_EXPIRED", + "evidenceRequired": true + } + }, + { + "name": "deny settlement before accepted task state", + "principal": "agent://requester-alpha", + "capability": { + "resources": ["market://settlement/*"], + "actions": ["release"] + }, + "request": { + "resource": "market://settlement/t-014", + "action": "release", + "task": { + "id": "t-014", + "mode": "bounty", + "status": "pending_approval", + "rewardUsdc": 12, + "selectedWorker": "agent://worker-verified" + }, + "settlement": { + "recipient": "agent://worker-verified", + "receiptId": "settlement-receipt-009" + } + }, + "expected": { + "decision": "deny", + "reason": "SETTLEMENT_STATE_INVALID", + "evidenceRequired": true + } + }, + { + "name": "deny replayed settlement receipt", + "principal": "agent://requester-alpha", + "capability": { + "resources": ["market://settlement/*"], + "actions": ["release"] + }, + "request": { + "resource": "market://settlement/t-015", + "action": "release", + "task": { + "id": "t-015", + "mode": "bounty", + "status": "accepted", + "rewardUsdc": 12, + "selectedWorker": "agent://worker-verified" + }, + "settlement": { + "recipient": "agent://worker-verified", + "receiptId": "settlement-receipt-duplicate" + } + }, + "expected": { + "decision": "deny", + "reason": "RECEIPT_REPLAY", + "evidenceRequired": true + } + }, + { + "name": "allow accepted task settlement release", + "principal": "agent://requester-alpha", + "capability": { + "resources": ["market://settlement/*"], + "actions": ["release"] + }, + "request": { + "resource": "market://settlement/t-016", + "action": "release", + "task": { + "id": "t-016", + "mode": "bounty", + "status": "accepted", + "rewardUsdc": 12, + "selectedWorker": "agent://worker-verified" + }, + "settlement": { + "recipient": "agent://worker-verified", + "receiptId": "settlement-receipt-011" + } + }, + "expected": { + "decision": "allow", + "reason": "ALLOW", + "nextStatus": "settled", + "evidenceRequired": true + } + } + ] +} diff --git a/packages/conformance-tests/package.json b/packages/conformance-tests/package.json index 395b302e..38bb6502 100644 --- a/packages/conformance-tests/package.json +++ b/packages/conformance-tests/package.json @@ -7,7 +7,7 @@ "build": "echo 'no build needed'", "test": "vitest run", "test:watch": "vitest watch", - "test:fixtures": "vitest run src/canonical-fixtures-conformance.test.ts src/a2a-fixtures-conformance.test.ts src/security-iot-fixtures-conformance.test.ts src/economy-fixtures-conformance.test.ts src/autogen-interop-conformance.test.ts src/agentskill-authz-fixtures-conformance.test.ts src/action-ref-explainability-conformance.test.ts src/payment-governance-fixtures-conformance.test.ts src/physical-ai-runtime-safety-fixtures-conformance.test.ts src/post-quantum-crypto-agility-conformance.test.ts src/humanoid-profile-conformance.test.ts src/humanoid-warehouse-pilot-conformance.test.ts src/eu-ai-act-conformity-pack-conformance.test.ts src/humanoid-multivendor-fleet-conformance.test.ts src/open-rmf-handoff-policy-receipts-conformance.test.ts src/moveit-manipulation-policy-receipts-conformance.test.ts src/nav2-navigation-policy-receipts-conformance.test.ts src/px4-offboard-policy-receipts-conformance.test.ts src/lerobot-policy-actuation-receipts-conformance.test.ts src/solar-field-operations-policy-receipts-conformance.test.ts src/industrial-cell-safety-pack-conformance.test.ts src/factory-action-demo-conformance.test.ts src/sint-industrial-pack-conformance.test.ts src/regulated-consent-extensions-conformance.test.ts src/autonomy-supervisor-conformance.test.ts", + "test:fixtures": "vitest run src/canonical-fixtures-conformance.test.ts src/a2a-fixtures-conformance.test.ts src/security-iot-fixtures-conformance.test.ts src/economy-fixtures-conformance.test.ts src/autogen-interop-conformance.test.ts src/agentskill-authz-fixtures-conformance.test.ts src/action-ref-explainability-conformance.test.ts src/payment-governance-fixtures-conformance.test.ts src/agent-commerce-governance-conformance.test.ts src/physical-ai-runtime-safety-fixtures-conformance.test.ts src/post-quantum-crypto-agility-conformance.test.ts src/humanoid-profile-conformance.test.ts src/humanoid-warehouse-pilot-conformance.test.ts src/eu-ai-act-conformity-pack-conformance.test.ts src/humanoid-multivendor-fleet-conformance.test.ts src/open-rmf-handoff-policy-receipts-conformance.test.ts src/moveit-manipulation-policy-receipts-conformance.test.ts src/nav2-navigation-policy-receipts-conformance.test.ts src/px4-offboard-policy-receipts-conformance.test.ts src/lerobot-policy-actuation-receipts-conformance.test.ts src/solar-field-operations-policy-receipts-conformance.test.ts src/industrial-cell-safety-pack-conformance.test.ts src/factory-action-demo-conformance.test.ts src/sint-industrial-pack-conformance.test.ts src/regulated-consent-extensions-conformance.test.ts src/autonomy-supervisor-conformance.test.ts", "test:physical-ai-runtime": "vitest run src/physical-ai-runtime-safety-fixtures-conformance.test.ts", "test:factory-action": "vitest run src/factory-action-demo-conformance.test.ts", "test:ros2-loop": "vitest run src/ros2-control-loop-latency.test.ts" diff --git a/packages/conformance-tests/src/agent-commerce-governance-conformance.test.ts b/packages/conformance-tests/src/agent-commerce-governance-conformance.test.ts new file mode 100644 index 00000000..da305f1a --- /dev/null +++ b/packages/conformance-tests/src/agent-commerce-governance-conformance.test.ts @@ -0,0 +1,283 @@ +/** + * Agent commerce governance fixture conformance. + * + * Covers task-market and x402-style pre-action controls: + * - registered agent identity + * - scoped market capabilities + * - task-mode state transitions + * - reputation gates for exclusive claims + * - auction bid validity + * - benchmark proof evidence + * - x402 permit cap and expiry + * - settlement state and receipt replay checks + */ + +import { describe, expect, it } from "vitest"; +import { + loadAgentCommerceGovernanceFixture, + type AgentCommerceDecision, + type AgentCommerceDecisionReason, + type AgentCommerceGovernanceCase, + type AgentCommerceGovernanceFixture, +} from "./fixture-loader.js"; + +interface AgentCommerceDecisionResult { + readonly decision: AgentCommerceDecision; + readonly reason: AgentCommerceDecisionReason; + readonly requiredTier?: string; + readonly nextStatus?: string; + readonly evidenceRequired: boolean; +} + +class AgentCommerceGovernanceHarness { + private readonly registeredAgents: Set; + private readonly allowedRecipients: Set; + private readonly usedSettlementReceiptIds: Set; + private readonly referenceTimeMs: number; + + constructor(private readonly fixture: AgentCommerceGovernanceFixture) { + this.registeredAgents = new Set(fixture.defaults.registeredAgents); + this.allowedRecipients = new Set(fixture.defaults.allowedRecipients); + this.usedSettlementReceiptIds = new Set(fixture.defaults.usedSettlementReceiptIds); + this.referenceTimeMs = Date.parse(fixture.defaults.referenceTime); + } + + evaluate(scenario: AgentCommerceGovernanceCase): AgentCommerceDecisionResult { + if (!this.registeredAgents.has(scenario.principal)) { + return this.deny("AGENT_IDENTITY_REQUIRED"); + } + + if (!this.hasScope(scenario)) { + return this.deny("SCOPE_NOT_AUTHORIZED"); + } + + const { request } = scenario; + + if (request.x402) { + return this.evaluateX402(request.x402); + } + + if (request.settlement) { + return this.evaluateSettlement(scenario); + } + + const task = request.task; + if (!task) { + return this.deny("STATE_TRANSITION_INVALID"); + } + + if (request.action === "create") { + if (!this.allowedRecipients.has(task.recipient ?? "")) { + return this.deny("RECIPIENT_NOT_ALLOWLISTED"); + } + if (task.rewardUsdc >= this.fixture.defaults.highValueApprovalThresholdUsdc) { + return { + decision: "escalate", + reason: "VALUE_REQUIRES_APPROVAL", + requiredTier: "T3_COMMIT", + nextStatus: "open", + evidenceRequired: true, + }; + } + return this.allow("open"); + } + + if (request.action === "claim") { + if (task.status !== "open" || task.mode !== "claim") { + return this.deny("STATE_TRANSITION_INVALID"); + } + const reputation = this.fixture.defaults.reputationByAgent[scenario.principal] ?? 0; + if (reputation < this.fixture.defaults.minReputationScore) { + return this.deny("REPUTATION_BELOW_THRESHOLD"); + } + return this.allow("claimed"); + } + + if (request.action === "pitch") { + if (task.status !== "open" || task.mode !== "pitch") { + return this.deny("STATE_TRANSITION_INVALID"); + } + return this.allow("open"); + } + + if (request.action === "select_worker") { + if (task.status !== "open" || task.mode !== "pitch" || !task.selectedWorker) { + return this.deny("STATE_TRANSITION_INVALID"); + } + return { + decision: "allow", + reason: "ALLOW", + nextStatus: "worker_selected", + evidenceRequired: true, + }; + } + + if (request.action === "bid") { + if (task.status !== "open" || task.mode !== "auction") { + return this.deny("STATE_TRANSITION_INVALID"); + } + if (task.auctionType === "english" && request.bid) { + const currentLowest = task.currentLowestBidUsdc ?? task.maxPriceUsdc ?? Infinity; + if (request.bid.priceUsdc >= currentLowest) { + return this.deny("BID_NOT_COMPETITIVE"); + } + } + return this.allow(); + } + + if (request.action === "submit_proof") { + if (task.mode !== "benchmark" || task.status !== "open") { + return this.deny("STATE_TRANSITION_INVALID"); + } + if (!request.proof?.digest || request.proof.metricValue === undefined) { + return this.deny("PROOF_REQUIRED"); + } + return this.allow("pending_approval"); + } + + if (request.action === "accept") { + if (task.status !== "pending_approval" || !task.selectedWorker) { + return this.deny("STATE_TRANSITION_INVALID"); + } + if (task.rewardUsdc >= this.fixture.defaults.highValueApprovalThresholdUsdc) { + return { + decision: "escalate", + reason: "VALUE_REQUIRES_APPROVAL", + requiredTier: "T3_COMMIT", + nextStatus: "accepted", + evidenceRequired: true, + }; + } + return { + decision: "allow", + reason: "ALLOW", + nextStatus: "accepted", + evidenceRequired: true, + }; + } + + return this.deny("STATE_TRANSITION_INVALID"); + } + + private evaluateX402( + x402: NonNullable, + ): AgentCommerceDecisionResult { + if (!this.allowedRecipients.has(x402.recipient)) { + return this.deny("RECIPIENT_NOT_ALLOWLISTED"); + } + if (x402.permitCapUsdc > this.fixture.defaults.maxX402SessionCapUsdc) { + return this.deny("X402_CAP_EXCEEDED"); + } + + const expiresAtMs = Date.parse(x402.expiresAt); + const maxExpiryMs = + this.referenceTimeMs + this.fixture.defaults.maxPermitExpiryMinutes * 60_000; + + if (expiresAtMs <= this.referenceTimeMs || expiresAtMs > maxExpiryMs) { + return this.deny("X402_PERMIT_EXPIRED"); + } + + return this.allow(); + } + + private evaluateSettlement(scenario: AgentCommerceGovernanceCase): AgentCommerceDecisionResult { + const { task, settlement } = scenario.request; + if (!task || !settlement) { + return this.deny("SETTLEMENT_STATE_INVALID"); + } + if (task.status !== "accepted") { + return this.deny("SETTLEMENT_STATE_INVALID"); + } + if (!this.allowedRecipients.has(settlement.recipient)) { + return this.deny("RECIPIENT_NOT_ALLOWLISTED"); + } + if (this.usedSettlementReceiptIds.has(settlement.receiptId)) { + return this.deny("RECEIPT_REPLAY"); + } + this.usedSettlementReceiptIds.add(settlement.receiptId); + return { + decision: "allow", + reason: "ALLOW", + nextStatus: "settled", + evidenceRequired: true, + }; + } + + private hasScope(scenario: AgentCommerceGovernanceCase): boolean { + const { capability, request } = scenario; + return ( + capability.actions.includes(request.action) && + capability.resources.some((resource) => resourceMatches(resource, request.resource)) + ); + } + + private allow(nextStatus?: string): AgentCommerceDecisionResult { + return { + decision: "allow", + reason: "ALLOW", + nextStatus, + evidenceRequired: false, + }; + } + + private deny(reason: AgentCommerceDecisionReason): AgentCommerceDecisionResult { + return { + decision: "deny", + reason, + evidenceRequired: true, + }; + } +} + +function resourceMatches(pattern: string, resource: string): boolean { + if (pattern.endsWith("*")) { + return resource.startsWith(pattern.slice(0, -1)); + } + return pattern === resource; +} + +describe("Agent Commerce Governance Fixture Conformance", () => { + const fixture = loadAgentCommerceGovernanceFixture(); + + it("fixture exposes the v1 market and payment control vocabulary", () => { + expect(fixture.fixtureId).toBe("sint.economy.agent-commerce-governance.v1"); + expect(fixture.profile.resources).toEqual([ + "market://task/*", + "market://settlement/*", + "x402://session/*", + ]); + expect(fixture.profile.taskModes).toEqual([ + "bounty", + "claim", + "pitch", + "benchmark", + "auction", + ]); + expect(fixture.profile.actions).toContain("authorize_permit"); + expect(fixture.profile.actions).toContain("release"); + expect(fixture.profile.decisionReasons).toContain("X402_CAP_EXCEEDED"); + expect(fixture.profile.decisionReasons).toContain("BID_NOT_COMPETITIVE"); + expect(fixture.cases.length).toBeGreaterThanOrEqual(10); + }); + + it("keeps every case tied to a stable decision reason and evidence rule", () => { + for (const scenario of fixture.cases) { + expect(fixture.profile.decisionReasons).toContain(scenario.expected.reason); + if (scenario.expected.decision !== "allow" || scenario.request.action === "release") { + expect(scenario.expected.evidenceRequired).toBe(true); + } + if (scenario.request.action === "submit_proof" && scenario.request.task?.mode === "benchmark") { + expect(scenario.request.proof?.type).toBe("benchmark"); + } + } + }); + + it("evaluates task-market and x402 controls deterministically", () => { + const harness = new AgentCommerceGovernanceHarness(fixture); + + for (const scenario of fixture.cases) { + const result = harness.evaluate(scenario); + expect(result).toEqual(scenario.expected); + } + }); +}); diff --git a/packages/conformance-tests/src/fixture-loader.ts b/packages/conformance-tests/src/fixture-loader.ts index 3120f4f9..119d504a 100644 --- a/packages/conformance-tests/src/fixture-loader.ts +++ b/packages/conformance-tests/src/fixture-loader.ts @@ -590,6 +590,108 @@ export interface PaymentGovernanceFixture { }>; } +export interface AgentCommerceGovernanceFixture { + readonly fixtureId: string; + readonly schemaVersion: string; + readonly description: string; + readonly profile: { + readonly resources: readonly string[]; + readonly actions: readonly string[]; + readonly taskModes: readonly string[]; + readonly decisionReasons: readonly AgentCommerceDecisionReason[]; + readonly evidenceRequiredFor: readonly string[]; + }; + readonly defaults: { + readonly referenceTime: string; + readonly minReputationScore: number; + readonly highValueApprovalThresholdUsdc: number; + readonly maxX402SessionCapUsdc: number; + readonly maxPermitExpiryMinutes: number; + readonly allowedRecipients: readonly string[]; + readonly registeredAgents: readonly string[]; + readonly reputationByAgent: Record; + readonly usedSettlementReceiptIds: readonly string[]; + }; + readonly cases: readonly AgentCommerceGovernanceCase[]; +} + +export type AgentCommerceDecision = "allow" | "deny" | "escalate"; + +export type AgentCommerceDecisionReason = + | "ALLOW" + | "AGENT_IDENTITY_REQUIRED" + | "SCOPE_NOT_AUTHORIZED" + | "REPUTATION_BELOW_THRESHOLD" + | "VALUE_REQUIRES_APPROVAL" + | "STATE_TRANSITION_INVALID" + | "BID_NOT_COMPETITIVE" + | "PROOF_REQUIRED" + | "X402_CAP_EXCEEDED" + | "X402_PERMIT_EXPIRED" + | "RECIPIENT_NOT_ALLOWLISTED" + | "SETTLEMENT_STATE_INVALID" + | "RECEIPT_REPLAY"; + +export interface AgentCommerceGovernanceCase { + readonly name: string; + readonly principal: string; + readonly capability: { + readonly resources: readonly string[]; + readonly actions: readonly string[]; + }; + readonly request: { + readonly resource: string; + readonly action: string; + readonly task?: { + readonly id: string; + readonly mode: "bounty" | "claim" | "pitch" | "benchmark" | "auction"; + readonly status: + | "draft" + | "open" + | "claimed" + | "worker_selected" + | "pending_approval" + | "accepted" + | "settled"; + readonly rewardUsdc: number; + readonly recipient?: string; + readonly stakeRequiredUsdc?: number; + readonly auctionType?: "english" | "reverse_english" | "dutch" | "reverse_dutch"; + readonly maxPriceUsdc?: number; + readonly currentLowestBidUsdc?: number; + readonly selectedWorker?: string; + readonly metricDescription?: string; + readonly metricTarget?: string; + }; + readonly bid?: { + readonly priceUsdc: number; + }; + readonly proof?: { + readonly type: string; + readonly metricName?: string; + readonly metricValue?: number; + readonly digest?: string; + }; + readonly x402?: { + readonly sessionId: string; + readonly recipient: string; + readonly permitCapUsdc: number; + readonly expiresAt: string; + }; + readonly settlement?: { + readonly recipient: string; + readonly receiptId: string; + }; + }; + readonly expected: { + readonly decision: AgentCommerceDecision; + readonly reason: AgentCommerceDecisionReason; + readonly requiredTier?: string; + readonly nextStatus?: string; + readonly evidenceRequired: boolean; + }; +} + function loadFixture(relativePath: string): T { const path = resolve(FIXTURE_ROOT, relativePath); const raw = readFileSync(path, "utf8"); @@ -692,6 +794,12 @@ export function loadPaymentGovernanceFixture(): PaymentGovernanceFixture { ); } +export function loadAgentCommerceGovernanceFixture(): AgentCommerceGovernanceFixture { + return loadFixture( + "economy/agent-commerce-governance.v1.json", + ); +} + export interface APSSINTHandshakeCase { readonly name: string; readonly scenario: "A" | "B" | "C";