Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion scripts/hooks/suggest-compact.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ async function main() {
// Track tool call count (increment in a temp file)
// Use a session-specific counter file based on session ID from environment
// or parent PID as fallback
const sessionId = process.env.CLAUDE_SESSION_ID || 'default';
const rawSessionId = process.env.CLAUDE_SESSION_ID || 'default';
const sessionId = String(rawSessionId).replace(/[^a-zA-Z0-9_-]/g, '') || 'default';
const counterFile = path.join(getTempDir(), `claude-tool-count-${sessionId}`);
const rawThreshold = parseInt(process.env.COMPACT_THRESHOLD || '50', 10);
const threshold = Number.isFinite(rawThreshold) && rawThreshold > 0 && rawThreshold <= 10000
Expand Down
14 changes: 10 additions & 4 deletions scripts/lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -331,16 +331,22 @@ function commandExists(cmd) {
/**
* Run a command and return output
*
* SECURITY NOTE: This function executes shell commands. Only use with
* trusted, hardcoded commands. Never pass user-controlled input directly.
* For user input, use spawnSync with argument arrays instead.
* SECURITY NOTE: This function executes shell commands. To reduce misuse risk,
* only a small allowlist of command prefixes is permitted.
*
* @param {string} cmd - Command to execute (should be trusted/hardcoded)
* @param {object} options - execSync options
*/
function runCommand(cmd, options = {}) {
const command = typeof cmd === 'string' ? cmd.trim() : '';
const allowlistRegex = /^(git|node|npx|which|where)(\s|$)/;
Copy link

@cubic-dev-ai cubic-dev-ai bot Mar 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: The prefix-only allowlist doesn't prevent shell injection after the allowed command name. Because execSync runs through a shell, metacharacters like ;, &&, |, or $() after git/node/etc. can execute arbitrary commands. Consider also rejecting shell metacharacters in the command string, e.g.:

if (/[;|&`$(){}]/.test(command)) {
  return { success: false, output: 'Command contains disallowed characters' };
}

Alternatively, switch to spawnSync with a parsed argument array to avoid shell interpretation entirely.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At scripts/lib/utils.js, line 342:

<comment>The prefix-only allowlist doesn't prevent shell injection *after* the allowed command name. Because `execSync` runs through a shell, metacharacters like `;`, `&&`, `|`, or `$()` after `git`/`node`/etc. can execute arbitrary commands. Consider also rejecting shell metacharacters in the command string, e.g.:

```js
if (/[;|&`$(){}]/.test(command)) {
  return { success: false, output: 'Command contains disallowed characters' };
}

Alternatively, switch to spawnSync with a parsed argument array to avoid shell interpretation entirely.

@@ -331,16 +331,22 @@ function commandExists(cmd) { */ function runCommand(cmd, options = {}) { + const command = typeof cmd === 'string' ? cmd.trim() : ''; + const allowlistRegex = /^(git|node|npx|which|where)(\s|$)/; + + if (!allowlistRegex.test(command)) { ```
Fix with Cubic


if (!allowlistRegex.test(command)) {
return { success: false, output: `Command not allowed: ${command || '<empty>'}` };
}

try {
const result = execSync(cmd, {
const result = execSync(command, {
encoding: 'utf8',
stdio: ['pipe', 'pipe', 'pipe'],
...options
Expand Down
47 changes: 40 additions & 7 deletions skills/continuous-learning-v2/hooks/observe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ source "${SKILL_ROOT}/scripts/detect-project.sh"

CONFIG_DIR="${HOME}/.claude/homunculus"
OBSERVATIONS_FILE="${PROJECT_DIR}/observations.jsonl"
OBSERVATIONS_ARCHIVE_DIR="${PROJECT_DIR}/observations.archive"
MAX_FILE_SIZE_MB=10
OBSERVATION_RETENTION_DAYS=30

# Skip if disabled
if [ -f "$CONFIG_DIR/disabled" ]; then
Expand Down Expand Up @@ -129,20 +131,35 @@ if [ "$PARSED_OK" != "True" ]; then
timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
export TIMESTAMP="$timestamp"
echo "$INPUT_JSON" | python3 -c "
import json, sys, os
import json, re, sys, os

raw = sys.stdin.read()[:2000]

patterns = [
(re.compile(r'(?i)(authorization\s*[:=]\s*(?:bearer\s+)?)[^\s,}]+'), r'\1[REDACTED]'),
(re.compile(r'(?i)((?:api[_-]?key|token|password|secret|passwd|client[_-]?secret)\s*[:=]\s*)[^\s,}]+'), r'\1[REDACTED]'),
(re.compile(r'\bgh[pousr]_[A-Za-z0-9]{20,}\b'), '[REDACTED_GITHUB_TOKEN]'),
(re.compile(r'\bsk-[A-Za-z0-9]{20,}\b'), '[REDACTED_API_KEY]')
]
for regex, replacement in patterns:
raw = regex.sub(replacement, raw)
Comment on lines +138 to +145
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

The scrubber misses the JSON form this hook actually writes.

When tool_input or tool_output is an object, this script serializes it with json.dumps(...) first, so the common shape here is "Authorization": "Bearer ...", "api_key": "...", etc. These regexes only match unquoted authorization: / api_key= forms, so structured tool payloads will still be persisted to observations.jsonl unredacted. Redact known secret keys on the parsed object before serializing, or extend the patterns to handle quoted JSON keys and quoted values.

Also applies to: 177-189

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@skills/continuous-learning-v2/hooks/observe.sh` around lines 138 - 145, The
scrubber currently only applies regexes to unquoted text (patterns list applied
in the for loop to raw) so JSON-serialized tool_input/tool_output like
"Authorization": "Bearer ..." or "api_key": "..." remains unredacted; update the
hook to either (1) detect when tool_input/tool_output are dict/objects and
redact known secret keys (e.g., "authorization", "api_key", "token", "password",
"secret", "client_secret", "ghp_", "sk-") on the parsed object before calling
json.dumps, or (2) extend the existing regex patterns to also match quoted JSON
keys and quoted values (e.g., include patterns that match
"\"authorization\"\s*:\s*\"[^\"]+\"" and similar) and apply those augmented
patterns to raw; apply the same change to the other scrubber block referenced
around lines 177-189 so all serialized JSON observations are redacted
consistently.


print(json.dumps({'timestamp': os.environ['TIMESTAMP'], 'event': 'parse_error', 'raw': raw}))
" >> "$OBSERVATIONS_FILE"
exit 0
fi

# Purge archived observation files older than retention window
mkdir -p "$OBSERVATIONS_ARCHIVE_DIR"
if command -v find >/dev/null 2>&1; then
find "$OBSERVATIONS_ARCHIVE_DIR" -type f -name 'observations-*.jsonl' -mtime +"$OBSERVATION_RETENTION_DAYS" -delete 2>/dev/null || true
fi

# Archive if file too large (atomic: rename with unique suffix to avoid race)
if [ -f "$OBSERVATIONS_FILE" ]; then
file_size_mb=$(du -m "$OBSERVATIONS_FILE" 2>/dev/null | cut -f1)
if [ "${file_size_mb:-0}" -ge "$MAX_FILE_SIZE_MB" ]; then
archive_dir="${PROJECT_DIR}/observations.archive"
mkdir -p "$archive_dir"
mv "$OBSERVATIONS_FILE" "$archive_dir/observations-$(date +%Y%m%d-%H%M%S)-$$.jsonl" 2>/dev/null || true
mv "$OBSERVATIONS_FILE" "$OBSERVATIONS_ARCHIVE_DIR/observations-$(date +%Y%m%d-%H%M%S)-$$.jsonl" 2>/dev/null || true
fi
fi

Expand All @@ -154,7 +171,23 @@ export PROJECT_NAME_ENV="$PROJECT_NAME"
export TIMESTAMP="$timestamp"

echo "$PARSED" | python3 -c "
import json, sys, os
import json, re, sys, os


def scrub(value):
if value is None:
return None
text = str(value)
patterns = [
(re.compile(r'(?i)(authorization\\s*[:=]\\s*(?:bearer\\s+)?)[^\\s,}]+'), r'\\1[REDACTED]'),
(re.compile(r'(?i)((?:api[_-]?key|token|password|secret|passwd|client[_-]?secret)\\s*[:=]\\s*)[^\\s,}]+'), r'\\1[REDACTED]'),
(re.compile(r'\\bgh[pousr]_[A-Za-z0-9]{20,}\\b'), '[REDACTED_GITHUB_TOKEN]'),
(re.compile(r'\\bsk-[A-Za-z0-9]{20,}\\b'), '[REDACTED_API_KEY]')
]
for regex, replacement in patterns:
text = regex.sub(replacement, text)
return text


parsed = json.load(sys.stdin)
observation = {
Expand All @@ -167,9 +200,9 @@ observation = {
}

if parsed['input']:
observation['input'] = parsed['input']
observation['input'] = scrub(parsed['input'])
if parsed['output'] is not None:
observation['output'] = parsed['output']
observation['output'] = scrub(parsed['output'])

print(json.dumps(observation))
" >> "$OBSERVATIONS_FILE"
Expand Down
19 changes: 17 additions & 2 deletions skills/continuous-learning-v2/scripts/detect-project.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ _CLV2_HOMUNCULUS_DIR="${HOME}/.claude/homunculus"
_CLV2_PROJECTS_DIR="${_CLV2_HOMUNCULUS_DIR}/projects"
_CLV2_REGISTRY_FILE="${_CLV2_HOMUNCULUS_DIR}/projects.json"

# Strip embedded credentials from HTTPS remote URLs before hashing/persisting,
# e.g. https://[email protected]/org/repo.git -> https://github.com/org/repo.git
_clv2_sanitize_remote_url() {
local raw_url="$1"
if [ -z "$raw_url" ]; then
printf ''
return 0
fi

printf '%s' "$raw_url" | sed -E 's#(https?://)[^/@]+@#\1#'
}

_clv2_detect_project() {
local project_root=""
local project_name=""
Expand Down Expand Up @@ -64,7 +76,10 @@ _clv2_detect_project() {
fi
fi

local hash_input="${remote_url:-$project_root}"
local sanitized_remote_url=""
sanitized_remote_url=$(_clv2_sanitize_remote_url "$remote_url")

local hash_input="${sanitized_remote_url:-$project_root}"
# Use SHA256 via python3 (portable across macOS/Linux, no shasum/sha256sum divergence)
Comment on lines +79 to 83
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Keep project_id hashing consistent with the Python path.

These lines now hash the sanitized remote URL, but skills/continuous-learning-v2/scripts/instinct-cli.py:138-160 still hashes the raw remote_url and skills/continuous-learning-v2/scripts/instinct-cli.py:170-190 writes registry entries under that raw-hash-derived ID. That splits one repo into two project IDs depending on whether the Bash hook or the Python CLI touched it, and it also strands existing state created from the old raw hash. Update both paths together or add a migration before changing the hash source here.

Also applies to: 107-108

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@skills/continuous-learning-v2/scripts/detect-project.sh` around lines 79 -
83, The change makes detect-project.sh compute hash_input from
sanitized_remote_url (via _clv2_sanitize_remote_url) but instinct-cli.py still
hashes the raw remote_url and writes registry entries under that raw-derived
project_id, causing split IDs; fix by making both sides consistent: either
revert detect-project.sh to use the raw remote_url for hash_input, or
(preferable) update instinct-cli.py hashing to sanitize the remote_url with the
same logic used in _clv2_sanitize_remote_url before computing project_id and
when writing registry entries (see the project_id/hash routine in
instinct-cli.py around the project hashing and registry write code), and add a
one-time migration that maps existing raw-hash registry entries to the new
sanitized-hash IDs to avoid stranded state.

project_id=$(printf '%s' "$hash_input" | python3 -c "import sys,hashlib; print(hashlib.sha256(sys.stdin.buffer.read()).hexdigest()[:12])" 2>/dev/null)

Expand All @@ -90,7 +105,7 @@ _clv2_detect_project() {
mkdir -p "${_CLV2_PROJECT_DIR}/evolved/agents"

# Update project registry (lightweight JSON mapping)
_clv2_update_project_registry "$project_id" "$project_name" "$project_root" "$remote_url"
_clv2_update_project_registry "$project_id" "$project_name" "$project_root" "$sanitized_remote_url"
}

_clv2_update_project_registry() {
Expand Down
24 changes: 23 additions & 1 deletion tests/hooks/suggest-compact.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,12 @@ function runCompact(envOverrides = {}) {
/**
* Get the counter file path for a given session ID.
*/
function sanitizeSessionId(sessionId) {
return String(sessionId || 'default').replace(/[^a-zA-Z0-9_-]/g, '') || 'default';
}

function getCounterFilePath(sessionId) {
return path.join(os.tmpdir(), `claude-tool-count-${sessionId}`);
return path.join(os.tmpdir(), `claude-tool-count-${sanitizeSessionId(sessionId)}`);
}

function runTests() {
Expand Down Expand Up @@ -366,6 +370,24 @@ function runTests() {
})) passed++;
else failed++;

if (test('sanitizes unsafe CLAUDE_SESSION_ID characters in counter filename', () => {
const unsafeSession = '../bad/session:?*';
const sanitizedCounterFile = getCounterFilePath(unsafeSession);
try { fs.unlinkSync(sanitizedCounterFile); } catch (_err) { /* ignore */ }
try {
const result = runCompact({ CLAUDE_SESSION_ID: unsafeSession });
assert.strictEqual(result.code, 0, 'Should exit 0');
assert.ok(fs.existsSync(sanitizedCounterFile), 'Counter file should be created with sanitized session ID');
assert.ok(
path.dirname(sanitizedCounterFile) === os.tmpdir(),
'Sanitized counter file should remain in temp directory'
);
} finally {
try { fs.unlinkSync(sanitizedCounterFile); } catch (_err) { /* ignore */ }
}
})) passed++;
else failed++;

// Summary
console.log(`
Results: Passed: ${passed}, Failed: ${failed}`);
Expand Down
6 changes: 6 additions & 0 deletions tests/lib/utils.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,12 @@ function runTests() {
assert.strictEqual(result.success, false);
})) passed++; else failed++;

if (test('runCommand blocks disallowed command prefixes', () => {
const result = utils.runCommand('echo hello');
assert.strictEqual(result.success, false);
assert.ok(result.output.includes('Command not allowed'));
})) passed++; else failed++;

// output() and log() tests
console.log('\noutput() and log():');

Expand Down