diff --git a/scripts/hooks/suggest-compact.js b/scripts/hooks/suggest-compact.js index 81acc53e1..4986ef4fa 100644 --- a/scripts/hooks/suggest-compact.js +++ b/scripts/hooks/suggest-compact.js @@ -25,7 +25,8 @@ async function main() { // Track tool call count (increment in a temp file) // Use a session-specific counter file based on session ID from environment // or parent PID as fallback - const sessionId = process.env.CLAUDE_SESSION_ID || 'default'; + const rawSessionId = process.env.CLAUDE_SESSION_ID || 'default'; + const sessionId = String(rawSessionId).replace(/[^a-zA-Z0-9_-]/g, '') || 'default'; const counterFile = path.join(getTempDir(), `claude-tool-count-${sessionId}`); const rawThreshold = parseInt(process.env.COMPACT_THRESHOLD || '50', 10); const threshold = Number.isFinite(rawThreshold) && rawThreshold > 0 && rawThreshold <= 10000 diff --git a/scripts/lib/utils.js b/scripts/lib/utils.js index 4c4975463..2e040d3bb 100644 --- a/scripts/lib/utils.js +++ b/scripts/lib/utils.js @@ -331,16 +331,22 @@ function commandExists(cmd) { /** * Run a command and return output * - * SECURITY NOTE: This function executes shell commands. Only use with - * trusted, hardcoded commands. Never pass user-controlled input directly. - * For user input, use spawnSync with argument arrays instead. + * SECURITY NOTE: This function executes shell commands. To reduce misuse risk, + * only a small allowlist of command prefixes is permitted. * * @param {string} cmd - Command to execute (should be trusted/hardcoded) * @param {object} options - execSync options */ function runCommand(cmd, options = {}) { + const command = typeof cmd === 'string' ? cmd.trim() : ''; + const allowlistRegex = /^(git|node|npx|which|where)(\s|$)/; + + if (!allowlistRegex.test(command)) { + return { success: false, output: `Command not allowed: ${command || ''}` }; + } + try { - const result = execSync(cmd, { + const result = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], ...options diff --git a/skills/continuous-learning-v2/hooks/observe.sh b/skills/continuous-learning-v2/hooks/observe.sh index 7f78f8018..ca80dfe1e 100755 --- a/skills/continuous-learning-v2/hooks/observe.sh +++ b/skills/continuous-learning-v2/hooks/observe.sh @@ -65,7 +65,9 @@ source "${SKILL_ROOT}/scripts/detect-project.sh" CONFIG_DIR="${HOME}/.claude/homunculus" OBSERVATIONS_FILE="${PROJECT_DIR}/observations.jsonl" +OBSERVATIONS_ARCHIVE_DIR="${PROJECT_DIR}/observations.archive" MAX_FILE_SIZE_MB=10 +OBSERVATION_RETENTION_DAYS=30 # Skip if disabled if [ -f "$CONFIG_DIR/disabled" ]; then @@ -129,20 +131,35 @@ if [ "$PARSED_OK" != "True" ]; then timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") export TIMESTAMP="$timestamp" echo "$INPUT_JSON" | python3 -c " -import json, sys, os +import json, re, sys, os + raw = sys.stdin.read()[:2000] + +patterns = [ + (re.compile(r'(?i)(authorization\s*[:=]\s*(?:bearer\s+)?)[^\s,}]+'), r'\1[REDACTED]'), + (re.compile(r'(?i)((?:api[_-]?key|token|password|secret|passwd|client[_-]?secret)\s*[:=]\s*)[^\s,}]+'), r'\1[REDACTED]'), + (re.compile(r'\bgh[pousr]_[A-Za-z0-9]{20,}\b'), '[REDACTED_GITHUB_TOKEN]'), + (re.compile(r'\bsk-[A-Za-z0-9]{20,}\b'), '[REDACTED_API_KEY]') +] +for regex, replacement in patterns: + raw = regex.sub(replacement, raw) + print(json.dumps({'timestamp': os.environ['TIMESTAMP'], 'event': 'parse_error', 'raw': raw})) " >> "$OBSERVATIONS_FILE" exit 0 fi +# Purge archived observation files older than retention window +mkdir -p "$OBSERVATIONS_ARCHIVE_DIR" +if command -v find >/dev/null 2>&1; then + find "$OBSERVATIONS_ARCHIVE_DIR" -type f -name 'observations-*.jsonl' -mtime +"$OBSERVATION_RETENTION_DAYS" -delete 2>/dev/null || true +fi + # Archive if file too large (atomic: rename with unique suffix to avoid race) if [ -f "$OBSERVATIONS_FILE" ]; then file_size_mb=$(du -m "$OBSERVATIONS_FILE" 2>/dev/null | cut -f1) if [ "${file_size_mb:-0}" -ge "$MAX_FILE_SIZE_MB" ]; then - archive_dir="${PROJECT_DIR}/observations.archive" - mkdir -p "$archive_dir" - mv "$OBSERVATIONS_FILE" "$archive_dir/observations-$(date +%Y%m%d-%H%M%S)-$$.jsonl" 2>/dev/null || true + mv "$OBSERVATIONS_FILE" "$OBSERVATIONS_ARCHIVE_DIR/observations-$(date +%Y%m%d-%H%M%S)-$$.jsonl" 2>/dev/null || true fi fi @@ -154,7 +171,23 @@ export PROJECT_NAME_ENV="$PROJECT_NAME" export TIMESTAMP="$timestamp" echo "$PARSED" | python3 -c " -import json, sys, os +import json, re, sys, os + + +def scrub(value): + if value is None: + return None + text = str(value) + patterns = [ + (re.compile(r'(?i)(authorization\\s*[:=]\\s*(?:bearer\\s+)?)[^\\s,}]+'), r'\\1[REDACTED]'), + (re.compile(r'(?i)((?:api[_-]?key|token|password|secret|passwd|client[_-]?secret)\\s*[:=]\\s*)[^\\s,}]+'), r'\\1[REDACTED]'), + (re.compile(r'\\bgh[pousr]_[A-Za-z0-9]{20,}\\b'), '[REDACTED_GITHUB_TOKEN]'), + (re.compile(r'\\bsk-[A-Za-z0-9]{20,}\\b'), '[REDACTED_API_KEY]') + ] + for regex, replacement in patterns: + text = regex.sub(replacement, text) + return text + parsed = json.load(sys.stdin) observation = { @@ -167,9 +200,9 @@ observation = { } if parsed['input']: - observation['input'] = parsed['input'] + observation['input'] = scrub(parsed['input']) if parsed['output'] is not None: - observation['output'] = parsed['output'] + observation['output'] = scrub(parsed['output']) print(json.dumps(observation)) " >> "$OBSERVATIONS_FILE" diff --git a/skills/continuous-learning-v2/scripts/detect-project.sh b/skills/continuous-learning-v2/scripts/detect-project.sh index 31703a218..ae2678e8c 100755 --- a/skills/continuous-learning-v2/scripts/detect-project.sh +++ b/skills/continuous-learning-v2/scripts/detect-project.sh @@ -23,6 +23,18 @@ _CLV2_HOMUNCULUS_DIR="${HOME}/.claude/homunculus" _CLV2_PROJECTS_DIR="${_CLV2_HOMUNCULUS_DIR}/projects" _CLV2_REGISTRY_FILE="${_CLV2_HOMUNCULUS_DIR}/projects.json" +# Strip embedded credentials from HTTPS remote URLs before hashing/persisting, +# e.g. https://ghp_xxx@github.com/org/repo.git -> https://github.com/org/repo.git +_clv2_sanitize_remote_url() { + local raw_url="$1" + if [ -z "$raw_url" ]; then + printf '' + return 0 + fi + + printf '%s' "$raw_url" | sed -E 's#(https?://)[^/@]+@#\1#' +} + _clv2_detect_project() { local project_root="" local project_name="" @@ -64,7 +76,10 @@ _clv2_detect_project() { fi fi - local hash_input="${remote_url:-$project_root}" + local sanitized_remote_url="" + sanitized_remote_url=$(_clv2_sanitize_remote_url "$remote_url") + + local hash_input="${sanitized_remote_url:-$project_root}" # Use SHA256 via python3 (portable across macOS/Linux, no shasum/sha256sum divergence) project_id=$(printf '%s' "$hash_input" | python3 -c "import sys,hashlib; print(hashlib.sha256(sys.stdin.buffer.read()).hexdigest()[:12])" 2>/dev/null) @@ -90,7 +105,7 @@ _clv2_detect_project() { mkdir -p "${_CLV2_PROJECT_DIR}/evolved/agents" # Update project registry (lightweight JSON mapping) - _clv2_update_project_registry "$project_id" "$project_name" "$project_root" "$remote_url" + _clv2_update_project_registry "$project_id" "$project_name" "$project_root" "$sanitized_remote_url" } _clv2_update_project_registry() { diff --git a/tests/hooks/suggest-compact.test.js b/tests/hooks/suggest-compact.test.js index 36dd8b176..b504a1a42 100644 --- a/tests/hooks/suggest-compact.test.js +++ b/tests/hooks/suggest-compact.test.js @@ -50,8 +50,12 @@ function runCompact(envOverrides = {}) { /** * Get the counter file path for a given session ID. */ +function sanitizeSessionId(sessionId) { + return String(sessionId || 'default').replace(/[^a-zA-Z0-9_-]/g, '') || 'default'; +} + function getCounterFilePath(sessionId) { - return path.join(os.tmpdir(), `claude-tool-count-${sessionId}`); + return path.join(os.tmpdir(), `claude-tool-count-${sanitizeSessionId(sessionId)}`); } function runTests() { @@ -366,6 +370,24 @@ function runTests() { })) passed++; else failed++; + if (test('sanitizes unsafe CLAUDE_SESSION_ID characters in counter filename', () => { + const unsafeSession = '../bad/session:?*'; + const sanitizedCounterFile = getCounterFilePath(unsafeSession); + try { fs.unlinkSync(sanitizedCounterFile); } catch (_err) { /* ignore */ } + try { + const result = runCompact({ CLAUDE_SESSION_ID: unsafeSession }); + assert.strictEqual(result.code, 0, 'Should exit 0'); + assert.ok(fs.existsSync(sanitizedCounterFile), 'Counter file should be created with sanitized session ID'); + assert.ok( + path.dirname(sanitizedCounterFile) === os.tmpdir(), + 'Sanitized counter file should remain in temp directory' + ); + } finally { + try { fs.unlinkSync(sanitizedCounterFile); } catch (_err) { /* ignore */ } + } + })) passed++; + else failed++; + // Summary console.log(` Results: Passed: ${passed}, Failed: ${failed}`); diff --git a/tests/lib/utils.test.js b/tests/lib/utils.test.js index 6a7c4125a..60c61c802 100644 --- a/tests/lib/utils.test.js +++ b/tests/lib/utils.test.js @@ -397,6 +397,12 @@ function runTests() { assert.strictEqual(result.success, false); })) passed++; else failed++; + if (test('runCommand blocks disallowed command prefixes', () => { + const result = utils.runCommand('echo hello'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('Command not allowed')); + })) passed++; else failed++; + // output() and log() tests console.log('\noutput() and log():');