diff --git a/lib/agent-errors.cjs b/lib/agent-errors.cjs new file mode 100644 index 0000000..26f9ee0 --- /dev/null +++ b/lib/agent-errors.cjs @@ -0,0 +1,384 @@ +'use strict'; + +/** + * lib/agent-errors.cjs — Agent failure taxonomy for GSD agents + * + * Defines a formal taxonomy of GSD agent failure modes with structured + * error codes, severity levels, and recommended recovery actions. + * + * Failure types: + * timeout — agent exceeded turn limit + * malformed-output — agent returned unparseable result + * partial-completion — agent completed some but not all tasks + * hallucination — agent claimed success but artifacts missing + * permission-denied — agent blocked by hook or sandbox + * + * Each failure type carries: + * - code — machine-readable error code (AGENT_ERR_*) + * - name — human-readable failure name + * - severity — critical | high | medium | low + * - description — what happened + * - recovery — recommended recovery action + * - retryable — whether automatic retry is safe + * + * Integrates with: + * - lib/errors.cjs (MgwError base class) + * - lib/retry.cjs (pipeline retry infrastructure) + */ + +const { MgwError } = require('./errors.cjs'); + +// --------------------------------------------------------------------------- +// Severity levels (ordered: critical > high > medium > low) +// --------------------------------------------------------------------------- + +/** + * Ordered severity levels for agent failures. + * Higher numeric weight = more severe. + */ +const SEVERITY_LEVELS = Object.freeze({ + critical: { name: 'critical', weight: 4, description: 'Agent produced dangerous or misleading results' }, + high: { name: 'high', weight: 3, description: 'Agent failed to produce usable output' }, + medium: { name: 'medium', weight: 2, description: 'Agent partially succeeded but gaps remain' }, + low: { name: 'low', weight: 1, description: 'Minor issue, likely recoverable automatically' }, +}); + +// --------------------------------------------------------------------------- +// Agent failure type definitions +// --------------------------------------------------------------------------- + +/** + * The five canonical agent failure types. + * + * Each entry is a frozen descriptor with: + * code, name, severity, description, recovery, retryable + */ +const AGENT_FAILURE_TYPES = Object.freeze({ + timeout: Object.freeze({ + code: 'AGENT_ERR_TIMEOUT', + name: 'timeout', + severity: 'high', + description: 'Agent exceeded turn limit or wall-clock timeout without completing its task', + recovery: 'Retry with reduced scope — split the task into smaller sub-tasks or increase the turn budget', + retryable: true, + }), + + 'malformed-output': Object.freeze({ + code: 'AGENT_ERR_MALFORMED_OUTPUT', + name: 'malformed-output', + severity: 'high', + description: 'Agent returned output that cannot be parsed or does not match the expected format', + recovery: 'Retry with explicit format instructions — add structured output examples to the prompt', + retryable: true, + }), + + 'partial-completion': Object.freeze({ + code: 'AGENT_ERR_PARTIAL_COMPLETION', + name: 'partial-completion', + severity: 'medium', + description: 'Agent completed some but not all assigned tasks — partial artifacts exist', + recovery: 'Spawn a continuation agent for the remaining tasks using the partial output as context', + retryable: true, + }), + + hallucination: Object.freeze({ + code: 'AGENT_ERR_HALLUCINATION', + name: 'hallucination', + severity: 'critical', + description: 'Agent claimed success but required artifacts are missing or do not match expectations', + recovery: 'Reject all results and retry with a verification-first approach — require artifact proof before completion', + retryable: false, + }), + + 'permission-denied': Object.freeze({ + code: 'AGENT_ERR_PERMISSION_DENIED', + name: 'permission-denied', + severity: 'high', + description: 'Agent was blocked by a pre-commit hook, sandbox restriction, or file permission', + recovery: 'Escalate to human operator — review sandbox configuration and hook settings', + retryable: false, + }), +}); + +// --------------------------------------------------------------------------- +// Error code to failure type lookup (reverse map) +// --------------------------------------------------------------------------- + +/** @type {Map} */ +const _codeToType = new Map(); +for (const [key, def] of Object.entries(AGENT_FAILURE_TYPES)) { + _codeToType.set(def.code, { key, ...def }); +} + +// --------------------------------------------------------------------------- +// AgentFailureError class +// --------------------------------------------------------------------------- + +/** + * Error class for agent failures. + * Extends MgwError with agent-specific context fields. + */ +class AgentFailureError extends MgwError { + /** + * @param {string} message - Human-readable error description + * @param {object} [opts] + * @param {string} [opts.agentType] - GSD agent type (gsd-planner, gsd-executor, etc.) + * @param {string} [opts.failureType] - Failure type key from AGENT_FAILURE_TYPES + * @param {string[]} [opts.artifacts] - List of expected artifacts that are missing/malformed + * @param {string} [opts.stage] - Pipeline stage where failure occurred + * @param {number} [opts.issueNumber] - Related GitHub issue number + * @param {Error} [opts.cause] - Original error + */ + constructor(message, opts) { + const o = opts || {}; + const failureDef = o.failureType ? AGENT_FAILURE_TYPES[o.failureType] : null; + const code = failureDef ? failureDef.code : 'AGENT_ERR_UNKNOWN'; + + super(message, { code, stage: o.stage, issueNumber: o.issueNumber, cause: o.cause }); + this.name = 'AgentFailureError'; + this.agentType = o.agentType || null; + this.failureType = o.failureType || null; + this.artifacts = Array.isArray(o.artifacts) ? o.artifacts : []; + } + + /** + * Get the full failure type definition for this error. + * @returns {object|null} + */ + getFailureDefinition() { + if (!this.failureType) return null; + return AGENT_FAILURE_TYPES[this.failureType] || null; + } + + /** + * Get the severity level for this error. + * @returns {string|null} + */ + getSeverity() { + const def = this.getFailureDefinition(); + return def ? def.severity : null; + } + + /** + * Check whether this failure type is safe to auto-retry. + * @returns {boolean} + */ + isRetryable() { + const def = this.getFailureDefinition(); + return def ? def.retryable : false; + } +} + +// --------------------------------------------------------------------------- +// Classification function +// --------------------------------------------------------------------------- + +/** + * Error message patterns mapped to agent failure types. + * Matched case-insensitively against error.message. + * + * Order matters — first match wins. More specific patterns come first. + */ +const CLASSIFICATION_PATTERNS = [ + // Timeout patterns + { pattern: 'turn limit', type: 'timeout' }, + { pattern: 'max turns', type: 'timeout' }, + { pattern: 'context window exhausted', type: 'timeout' }, + { pattern: 'exceeded.*timeout', type: 'timeout' }, + { pattern: 'agent timed out', type: 'timeout' }, + { pattern: 'wall.?clock.*exceeded', type: 'timeout' }, + + // Malformed output patterns + { pattern: 'unparseable', type: 'malformed-output' }, + { pattern: 'invalid json', type: 'malformed-output' }, + { pattern: 'parse error', type: 'malformed-output' }, + { pattern: 'unexpected token', type: 'malformed-output' }, + { pattern: 'malformed output', type: 'malformed-output' }, + { pattern: 'missing required field', type: 'malformed-output' }, + { pattern: 'output format', type: 'malformed-output' }, + { pattern: 'expected.*format', type: 'malformed-output' }, + + // Partial completion patterns + { pattern: 'partial completion', type: 'partial-completion' }, + { pattern: 'incomplete.*tasks', type: 'partial-completion' }, + { pattern: 'completed.*of.*tasks', type: 'partial-completion' }, + { pattern: 'remaining tasks', type: 'partial-completion' }, + { pattern: 'some tasks failed', type: 'partial-completion' }, + + // Hallucination patterns (check before generic patterns) + { pattern: 'artifacts? missing', type: 'hallucination' }, + { pattern: 'claimed success.*missing', type: 'hallucination' }, + { pattern: 'file.*not found.*after', type: 'hallucination' }, + { pattern: 'verification failed.*not exist', type: 'hallucination' }, + { pattern: 'hallucination', type: 'hallucination' }, + { pattern: 'phantom', type: 'hallucination' }, + + // Permission denied patterns + { pattern: 'permission denied', type: 'permission-denied' }, + { pattern: 'access denied', type: 'permission-denied' }, + { pattern: 'hook.*failed', type: 'permission-denied' }, + { pattern: 'pre.?commit.*rejected', type: 'permission-denied' }, + { pattern: 'sandbox.*blocked', type: 'permission-denied' }, + { pattern: 'sandbox.*violation', type: 'permission-denied' }, + { pattern: 'eacces', type: 'permission-denied' }, +]; + +/** + * Classify a raw error into an agent failure type. + * + * Examines the error message (and optional context) to determine which + * agent failure type best describes the failure. Falls back to null + * if no pattern matches (caller should use lib/retry.cjs for generic + * classification in that case). + * + * @param {object} error - Error object with at least a `message` field + * @param {string} [error.message] - Error message to classify + * @param {string} [error.code] - Error code (e.g. 'EACCES') + * @param {object} [context] - Optional context for richer classification + * @param {string} [context.agentType] - GSD agent type that failed + * @param {string[]} [context.expectedArtifacts] - Artifacts the agent was expected to produce + * @param {string[]} [context.actualArtifacts] - Artifacts the agent actually produced + * @param {number} [context.tasksTotal] - Total number of tasks assigned + * @param {number} [context.tasksCompleted] - Number of tasks completed + * @returns {{ type: string, code: string, severity: string, confidence: 'high'|'medium'|'low' }|null} + */ +function classifyAgentFailure(error, context) { + if (!error || typeof error !== 'object') return null; + + const message = (error.message || '').toLowerCase(); + const code = (error.code || '').toLowerCase(); + const ctx = context || {}; + + // --- Context-based classification (higher confidence) --- + + // Artifact mismatch → hallucination + if (ctx.expectedArtifacts && ctx.actualArtifacts) { + const expected = new Set(ctx.expectedArtifacts); + const actual = new Set(ctx.actualArtifacts); + const missing = [...expected].filter(a => !actual.has(a)); + if (missing.length > 0 && ctx.tasksCompleted > 0) { + const def = AGENT_FAILURE_TYPES.hallucination; + return { type: 'hallucination', code: def.code, severity: def.severity, confidence: 'high' }; + } + } + + // Partial task completion + if (typeof ctx.tasksTotal === 'number' && typeof ctx.tasksCompleted === 'number') { + if (ctx.tasksCompleted > 0 && ctx.tasksCompleted < ctx.tasksTotal) { + const def = AGENT_FAILURE_TYPES['partial-completion']; + return { type: 'partial-completion', code: def.code, severity: def.severity, confidence: 'high' }; + } + } + + // --- Error code classification --- + + if (code === 'eacces' || code === 'eperm') { + const def = AGENT_FAILURE_TYPES['permission-denied']; + return { type: 'permission-denied', code: def.code, severity: def.severity, confidence: 'high' }; + } + + // --- Message pattern classification --- + + for (const { pattern, type } of CLASSIFICATION_PATTERNS) { + const regex = new RegExp(pattern, 'i'); + if (regex.test(message)) { + const def = AGENT_FAILURE_TYPES[type]; + return { type, code: def.code, severity: def.severity, confidence: 'medium' }; + } + } + + // No match — return null so caller can fall back to generic retry classification + return null; +} + +// --------------------------------------------------------------------------- +// Recovery action lookup +// --------------------------------------------------------------------------- + +/** + * Get the recommended recovery action for a failure type. + * + * @param {string} failureType - Failure type key (e.g. 'timeout', 'hallucination') + * @returns {{ action: string, retryable: boolean, severity: string }|null} + */ +function getRecoveryAction(failureType) { + const def = AGENT_FAILURE_TYPES[failureType]; + if (!def) return null; + + return { + action: def.recovery, + retryable: def.retryable, + severity: def.severity, + }; +} + +// --------------------------------------------------------------------------- +// Retry eligibility check +// --------------------------------------------------------------------------- + +/** + * Check whether a failure type is safe for automatic retry. + * + * @param {string} failureType - Failure type key + * @returns {boolean} + */ +function isRetryable(failureType) { + const def = AGENT_FAILURE_TYPES[failureType]; + return def ? def.retryable : false; +} + +// --------------------------------------------------------------------------- +// Severity comparison utility +// --------------------------------------------------------------------------- + +/** + * Compare two severity levels. + * Returns positive if a is more severe, negative if b is more severe, 0 if equal. + * + * @param {string} a - Severity level name + * @param {string} b - Severity level name + * @returns {number} + */ +function compareSeverity(a, b) { + const weightA = (SEVERITY_LEVELS[a] || { weight: 0 }).weight; + const weightB = (SEVERITY_LEVELS[b] || { weight: 0 }).weight; + return weightA - weightB; +} + +// --------------------------------------------------------------------------- +// Failure type lookup by error code +// --------------------------------------------------------------------------- + +/** + * Look up a failure type definition by its error code. + * + * @param {string} errorCode - Error code (e.g. 'AGENT_ERR_TIMEOUT') + * @returns {object|null} Failure type definition or null + */ +function getFailureByCode(errorCode) { + return _codeToType.get(errorCode) || null; +} + +// --------------------------------------------------------------------------- +// Exports +// --------------------------------------------------------------------------- + +module.exports = { + // Constants + AGENT_FAILURE_TYPES, + SEVERITY_LEVELS, + + // Error class + AgentFailureError, + + // Classification + classifyAgentFailure, + + // Recovery + getRecoveryAction, + isRetryable, + + // Utilities + compareSeverity, + getFailureByCode, +}; diff --git a/lib/retry-policy.cjs b/lib/retry-policy.cjs new file mode 100644 index 0000000..268a9a7 --- /dev/null +++ b/lib/retry-policy.cjs @@ -0,0 +1,438 @@ +'use strict'; + +/** + * lib/retry-policy.cjs — Configurable retry policy engine for GSD agent failures + * + * Higher-level retry engine that USES lib/agent-errors.cjs for failure + * classification and provides per-failure-type retry limits with + * per-agent-type overrides from .mgw/config.json. + * + * This module does NOT replace lib/retry.cjs — that module handles + * low-level pipeline retry infrastructure. This module provides + * policy-driven retry for Task() agent calls. + * + * Default retry policies: + * timeout: 2 retries (agent exceeded turn limit — often recoverable) + * malformed-output: 1 retry (may succeed with same prompt) + * partial-completion: 1 retry (continuation may complete remaining tasks) + * hallucination: 0 retries (unsafe to retry — results are unreliable) + * permission-denied: 0 retries (requires human intervention) + * + * Integrates with: + * - lib/agent-errors.cjs (agent failure taxonomy & classification) + * - lib/retry.cjs (generic failure classification fallback) + * - lib/errors.cjs (MgwError base class) + */ + +const fs = require('fs'); +const path = require('path'); +const { MgwError } = require('./errors.cjs'); +const { classifyFailure, getBackoffMs: genericBackoff } = require('./retry.cjs'); + +// --------------------------------------------------------------------------- +// Safe import of agent-errors.cjs (may not exist if PR #229 not merged) +// --------------------------------------------------------------------------- + +let classifyAgentFailure = null; +let AGENT_FAILURE_TYPES = null; +try { + const agentErrors = require('./agent-errors.cjs'); + classifyAgentFailure = agentErrors.classifyAgentFailure; + AGENT_FAILURE_TYPES = agentErrors.AGENT_FAILURE_TYPES; +} catch (_) { + // agent-errors.cjs not available — fall back to generic classification only +} + +// --------------------------------------------------------------------------- +// Default policy constants +// --------------------------------------------------------------------------- + +/** + * Default maximum retry counts per agent failure type. + * + * These represent the number of RETRY attempts (not total attempts). + * Total attempts = maxRetries + 1 (the initial attempt). + */ +const DEFAULT_RETRY_POLICIES = Object.freeze({ + 'timeout': 2, + 'malformed-output': 1, + 'partial-completion': 1, + 'hallucination': 0, + 'permission-denied': 0, +}); + +/** + * Default backoff configuration for retry delays. + * + * Uses exponential backoff: delay = min(maxMs, baseMs * multiplier^attempt) + * With optional full jitter: delay = random(0, computed_delay) + */ +const DEFAULT_BACKOFF_CONFIG = Object.freeze({ + baseMs: 5000, + maxMs: 300000, + multiplier: 2, + jitter: true, +}); + +// --------------------------------------------------------------------------- +// RetryPolicyError class +// --------------------------------------------------------------------------- + +/** + * Error thrown when retry policy is exhausted or a non-retryable failure occurs. + * Wraps the original error with retry context. + */ +class RetryPolicyError extends MgwError { + /** + * @param {string} message + * @param {object} [opts] + * @param {string} [opts.failureType] - Classified failure type + * @param {string} [opts.agentType] - GSD agent type + * @param {number} [opts.attempts] - Total attempts made + * @param {number} [opts.maxRetries] - Max retries allowed for this failure type + * @param {Error} [opts.cause] - Original error + */ + constructor(message, opts) { + const o = opts || {}; + super(message, { code: 'RETRY_POLICY_EXHAUSTED', stage: o.stage, issueNumber: o.issueNumber, cause: o.cause }); + this.name = 'RetryPolicyError'; + this.failureType = o.failureType || null; + this.agentType = o.agentType || null; + this.attempts = typeof o.attempts === 'number' ? o.attempts : 0; + this.maxRetries = typeof o.maxRetries === 'number' ? o.maxRetries : 0; + } +} + +// --------------------------------------------------------------------------- +// RetryPolicyEngine class +// --------------------------------------------------------------------------- + +/** + * Configurable retry policy engine for GSD agent failures. + * + * Provides per-failure-type retry limits with per-agent-type overrides. + * Configuration is loaded from .mgw/config.json if present. + * + * Merge priority (right wins): + * DEFAULT_RETRY_POLICIES < config file < constructor opts + */ +class RetryPolicyEngine { + /** + * @param {object} [opts] + * @param {object} [opts.policies] - Per-failure-type max retries override + * @param {object} [opts.backoff] - Backoff config override + * @param {object} [opts.agentOverrides] - Per-agent-type policy overrides + * Format: { 'gsd-planner': { timeout: 3 }, 'gsd-executor': { timeout: 1 } } + * @param {string} [opts.configPath] - Path to .mgw/config.json (default: auto-detect) + */ + constructor(opts) { + const o = opts || {}; + + // Load config from file if available + const fileConfig = RetryPolicyEngine.loadConfig(o.configPath || null); + + // Merge policies: defaults < file config < constructor opts + this._policies = Object.assign( + {}, + DEFAULT_RETRY_POLICIES, + fileConfig.policies || {}, + o.policies || {} + ); + + // Merge backoff: defaults < file config < constructor opts + this._backoff = Object.assign( + {}, + DEFAULT_BACKOFF_CONFIG, + fileConfig.backoff || {}, + o.backoff || {} + ); + + // Merge agent overrides: file config < constructor opts + this._agentOverrides = Object.assign( + {}, + fileConfig.agentOverrides || {}, + o.agentOverrides || {} + ); + } + + // ------------------------------------------------------------------------- + // Policy query methods + // ------------------------------------------------------------------------- + + /** + * Get the maximum retry count for a failure type, with optional + * per-agent-type override. + * + * Lookup order: + * 1. agentOverrides[agentType][failureType] (if agentType provided) + * 2. policies[failureType] + * 3. 0 (unknown failure types are never retried) + * + * @param {string} failureType - Agent failure type (e.g. 'timeout') + * @param {string} [agentType] - GSD agent type (e.g. 'gsd-planner') + * @returns {number} Max retry count (0 means no retries) + */ + getMaxRetries(failureType, agentType) { + // Check agent-specific override first + if (agentType && this._agentOverrides[agentType]) { + const override = this._agentOverrides[agentType][failureType]; + if (typeof override === 'number') { + return Math.max(0, Math.floor(override)); + } + } + + // Fall back to global policy + const global = this._policies[failureType]; + if (typeof global === 'number') { + return Math.max(0, Math.floor(global)); + } + + // Unknown failure type — never retry + return 0; + } + + /** + * Determine whether a retry should be attempted for a given failure type, + * agent type, and current attempt count. + * + * @param {string} failureType - Agent failure type + * @param {string} [agentType] - GSD agent type + * @param {number} currentAttempt - Number of retries already attempted (0-based) + * @returns {boolean} True if another retry is allowed + */ + shouldRetry(failureType, agentType, currentAttempt) { + const maxRetries = this.getMaxRetries(failureType, agentType); + const attempt = typeof currentAttempt === 'number' ? currentAttempt : 0; + return attempt < maxRetries; + } + + /** + * Calculate backoff delay in milliseconds for a given attempt number. + * + * Uses exponential backoff: delay = min(maxMs, baseMs * multiplier^attempt) + * With optional full jitter: delay = random(0, computed_delay) + * + * @param {number} attempt - Retry attempt number (0-based) + * @returns {number} Delay in milliseconds (integer, non-negative) + */ + getBackoffMs(attempt) { + const a = Math.max(0, Math.floor(attempt)); + const base = Math.min( + this._backoff.maxMs, + this._backoff.baseMs * Math.pow(this._backoff.multiplier, a) + ); + + if (this._backoff.jitter) { + // Full jitter: uniform random in [0, base] + return Math.floor(Math.random() * (base + 1)); + } + + return Math.floor(base); + } + + // ------------------------------------------------------------------------- + // Execution wrapper + // ------------------------------------------------------------------------- + + /** + * Execute an async function with retry policy. + * + * Wraps a Task() call (or any async function) with automatic retry + * based on the configured policy. On failure: + * + * 1. Classify using classifyAgentFailure (from agent-errors.cjs) + * 2. Fall back to classifyFailure (from retry.cjs) if no agent match + * 3. Check shouldRetry with the classified type + * 4. If retryable and attempts remain: wait backoff, retry + * 5. If not retryable or exhausted: throw original error + * + * @param {() => Promise<*>} fn - Async function to execute + * @param {object} [opts] + * @param {string} [opts.agentType] - GSD agent type for override lookup + * @param {function} [opts.onRetry] - Callback on retry: (attempt, failureType, backoffMs) => void + * @param {AbortSignal} [opts.signal] - AbortSignal to cancel retries + * @returns {Promise<*>} Result of fn() + * @throws {Error} Original error if all retries exhausted or failure is non-retryable + */ + async executeWithPolicy(fn, opts) { + const o = opts || {}; + const agentType = o.agentType || null; + const onRetry = typeof o.onRetry === 'function' ? o.onRetry : null; + const signal = o.signal || null; + + let lastError; + let attempt = 0; + + // eslint-disable-next-line no-constant-condition + while (true) { + // Check for abort signal + if (signal && signal.aborted) { + throw lastError || new MgwError('Retry aborted by signal', { code: 'RETRY_ABORTED' }); + } + + try { + return await fn(); + } catch (err) { + lastError = err; + + // Classify the failure + const failureType = this._classifyError(err, agentType); + + // Check if we should retry + if (!this.shouldRetry(failureType, agentType, attempt)) { + // Not retryable or retries exhausted — throw original error + throw err; + } + + // Calculate backoff + const backoffMs = this.getBackoffMs(attempt); + + // Notify caller of retry + if (onRetry) { + try { + onRetry(attempt, failureType, backoffMs); + } catch (_) { + // onRetry errors are non-fatal + } + } + + // Wait for backoff + if (backoffMs > 0) { + await new Promise((resolve, reject) => { + const timer = setTimeout(resolve, backoffMs); + + // Handle abort during backoff + if (signal) { + const onAbort = () => { + clearTimeout(timer); + reject(lastError); + }; + signal.addEventListener('abort', onAbort, { once: true }); + // Clean up listener when timer fires + const origResolve = resolve; + // eslint-disable-next-line no-param-reassign + resolve = () => { + signal.removeEventListener('abort', onAbort); + origResolve(); + }; + } + }); + } + + attempt++; + } + } + } + + // ------------------------------------------------------------------------- + // Internal: error classification + // ------------------------------------------------------------------------- + + /** + * Classify an error into an agent failure type string. + * + * Uses agent-errors.cjs classification first, then falls back to + * generic retry.cjs classification. + * + * @param {Error} err - The error to classify + * @param {string} [agentType] - GSD agent type for context + * @returns {string} Failure type key (e.g. 'timeout', 'permanent', 'transient') + * @private + */ + _classifyError(err, agentType) { + // Try agent-specific classification first + if (classifyAgentFailure) { + const agentResult = classifyAgentFailure(err, { agentType }); + if (agentResult && agentResult.type) { + return agentResult.type; + } + } + + // Fall back to generic classification + const genericResult = classifyFailure(err); + if (genericResult && genericResult.class) { + // Map generic classes to policy keys + // 'transient' from retry.cjs maps to 'timeout' in our policy + // (generic transient errors get the timeout retry budget) + if (genericResult.class === 'transient') return 'timeout'; + if (genericResult.class === 'needs-info') return 'needs-info'; + // 'permanent' is not retried + return 'permanent'; + } + + // Unknown — treat as permanent (not retried) + return 'permanent'; + } + + // ------------------------------------------------------------------------- + // Static: config loading + // ------------------------------------------------------------------------- + + /** + * Load retry policy configuration from .mgw/config.json. + * + * Looks for a `retry_policies` section in the config file: + * ```json + * { + * "retry_policies": { + * "policies": { + * "timeout": 3, + * "malformed-output": 2 + * }, + * "backoff": { + * "baseMs": 10000 + * }, + * "agentOverrides": { + * "gsd-planner": { "timeout": 3 }, + * "gsd-executor": { "malformed-output": 0 } + * } + * } + * } + * ``` + * + * @param {string} [configPath] - Explicit path to config.json. If null, + * searches for .mgw/config.json relative to cwd. + * @returns {{ policies?: object, backoff?: object, agentOverrides?: object }} + */ + static loadConfig(configPath) { + const empty = {}; + + try { + const cfgPath = configPath || path.join(process.cwd(), '.mgw', 'config.json'); + if (!fs.existsSync(cfgPath)) return empty; + + const raw = fs.readFileSync(cfgPath, 'utf-8'); + const config = JSON.parse(raw); + + if (!config || typeof config !== 'object') return empty; + + const section = config.retry_policies; + if (!section || typeof section !== 'object') return empty; + + return { + policies: section.policies && typeof section.policies === 'object' ? section.policies : undefined, + backoff: section.backoff && typeof section.backoff === 'object' ? section.backoff : undefined, + agentOverrides: section.agentOverrides && typeof section.agentOverrides === 'object' ? section.agentOverrides : undefined, + }; + } catch (_) { + // Config load failure is non-fatal — use defaults + return empty; + } + } +} + +// --------------------------------------------------------------------------- +// Exports +// --------------------------------------------------------------------------- + +module.exports = { + // Constants + DEFAULT_RETRY_POLICIES, + DEFAULT_BACKOFF_CONFIG, + + // Error class + RetryPolicyError, + + // Engine + RetryPolicyEngine, +};