-
-
Notifications
You must be signed in to change notification settings - Fork 8.9k
fix: harden hook telemetry handling for secrets and unsafe inputs #351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,7 +65,9 @@ source "${SKILL_ROOT}/scripts/detect-project.sh" | |
|
|
||
| CONFIG_DIR="${HOME}/.claude/homunculus" | ||
| OBSERVATIONS_FILE="${PROJECT_DIR}/observations.jsonl" | ||
| OBSERVATIONS_ARCHIVE_DIR="${PROJECT_DIR}/observations.archive" | ||
| MAX_FILE_SIZE_MB=10 | ||
| OBSERVATION_RETENTION_DAYS=30 | ||
|
|
||
| # Skip if disabled | ||
| if [ -f "$CONFIG_DIR/disabled" ]; then | ||
|
|
@@ -129,20 +131,35 @@ if [ "$PARSED_OK" != "True" ]; then | |
| timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | ||
| export TIMESTAMP="$timestamp" | ||
| echo "$INPUT_JSON" | python3 -c " | ||
| import json, sys, os | ||
| import json, re, sys, os | ||
|
|
||
| raw = sys.stdin.read()[:2000] | ||
|
|
||
| patterns = [ | ||
| (re.compile(r'(?i)(authorization\s*[:=]\s*(?:bearer\s+)?)[^\s,}]+'), r'\1[REDACTED]'), | ||
| (re.compile(r'(?i)((?:api[_-]?key|token|password|secret|passwd|client[_-]?secret)\s*[:=]\s*)[^\s,}]+'), r'\1[REDACTED]'), | ||
| (re.compile(r'\bgh[pousr]_[A-Za-z0-9]{20,}\b'), '[REDACTED_GITHUB_TOKEN]'), | ||
| (re.compile(r'\bsk-[A-Za-z0-9]{20,}\b'), '[REDACTED_API_KEY]') | ||
| ] | ||
| for regex, replacement in patterns: | ||
| raw = regex.sub(replacement, raw) | ||
|
Comment on lines
+138
to
+145
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The scrubber misses the JSON form this hook actually writes. When Also applies to: 177-189 🤖 Prompt for AI Agents |
||
|
|
||
| print(json.dumps({'timestamp': os.environ['TIMESTAMP'], 'event': 'parse_error', 'raw': raw})) | ||
| " >> "$OBSERVATIONS_FILE" | ||
| exit 0 | ||
| fi | ||
|
|
||
| # Purge archived observation files older than retention window | ||
| mkdir -p "$OBSERVATIONS_ARCHIVE_DIR" | ||
| if command -v find >/dev/null 2>&1; then | ||
| find "$OBSERVATIONS_ARCHIVE_DIR" -type f -name 'observations-*.jsonl' -mtime +"$OBSERVATION_RETENTION_DAYS" -delete 2>/dev/null || true | ||
| fi | ||
|
|
||
| # Archive if file too large (atomic: rename with unique suffix to avoid race) | ||
| if [ -f "$OBSERVATIONS_FILE" ]; then | ||
| file_size_mb=$(du -m "$OBSERVATIONS_FILE" 2>/dev/null | cut -f1) | ||
| if [ "${file_size_mb:-0}" -ge "$MAX_FILE_SIZE_MB" ]; then | ||
| archive_dir="${PROJECT_DIR}/observations.archive" | ||
| mkdir -p "$archive_dir" | ||
| mv "$OBSERVATIONS_FILE" "$archive_dir/observations-$(date +%Y%m%d-%H%M%S)-$$.jsonl" 2>/dev/null || true | ||
| mv "$OBSERVATIONS_FILE" "$OBSERVATIONS_ARCHIVE_DIR/observations-$(date +%Y%m%d-%H%M%S)-$$.jsonl" 2>/dev/null || true | ||
| fi | ||
| fi | ||
|
|
||
|
|
@@ -154,7 +171,23 @@ export PROJECT_NAME_ENV="$PROJECT_NAME" | |
| export TIMESTAMP="$timestamp" | ||
|
|
||
| echo "$PARSED" | python3 -c " | ||
| import json, sys, os | ||
| import json, re, sys, os | ||
|
|
||
|
|
||
| def scrub(value): | ||
| if value is None: | ||
| return None | ||
| text = str(value) | ||
| patterns = [ | ||
| (re.compile(r'(?i)(authorization\\s*[:=]\\s*(?:bearer\\s+)?)[^\\s,}]+'), r'\\1[REDACTED]'), | ||
| (re.compile(r'(?i)((?:api[_-]?key|token|password|secret|passwd|client[_-]?secret)\\s*[:=]\\s*)[^\\s,}]+'), r'\\1[REDACTED]'), | ||
| (re.compile(r'\\bgh[pousr]_[A-Za-z0-9]{20,}\\b'), '[REDACTED_GITHUB_TOKEN]'), | ||
| (re.compile(r'\\bsk-[A-Za-z0-9]{20,}\\b'), '[REDACTED_API_KEY]') | ||
| ] | ||
| for regex, replacement in patterns: | ||
| text = regex.sub(replacement, text) | ||
| return text | ||
|
|
||
|
|
||
| parsed = json.load(sys.stdin) | ||
| observation = { | ||
|
|
@@ -167,9 +200,9 @@ observation = { | |
| } | ||
|
|
||
| if parsed['input']: | ||
| observation['input'] = parsed['input'] | ||
| observation['input'] = scrub(parsed['input']) | ||
| if parsed['output'] is not None: | ||
| observation['output'] = parsed['output'] | ||
| observation['output'] = scrub(parsed['output']) | ||
|
|
||
| print(json.dumps(observation)) | ||
| " >> "$OBSERVATIONS_FILE" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,18 @@ _CLV2_HOMUNCULUS_DIR="${HOME}/.claude/homunculus" | |
| _CLV2_PROJECTS_DIR="${_CLV2_HOMUNCULUS_DIR}/projects" | ||
| _CLV2_REGISTRY_FILE="${_CLV2_HOMUNCULUS_DIR}/projects.json" | ||
|
|
||
| # Strip embedded credentials from HTTPS remote URLs before hashing/persisting, | ||
| # e.g. https://[email protected]/org/repo.git -> https://github.com/org/repo.git | ||
| _clv2_sanitize_remote_url() { | ||
| local raw_url="$1" | ||
| if [ -z "$raw_url" ]; then | ||
| printf '' | ||
| return 0 | ||
| fi | ||
|
|
||
| printf '%s' "$raw_url" | sed -E 's#(https?://)[^/@]+@#\1#' | ||
| } | ||
|
|
||
| _clv2_detect_project() { | ||
| local project_root="" | ||
| local project_name="" | ||
|
|
@@ -64,7 +76,10 @@ _clv2_detect_project() { | |
| fi | ||
| fi | ||
|
|
||
| local hash_input="${remote_url:-$project_root}" | ||
| local sanitized_remote_url="" | ||
| sanitized_remote_url=$(_clv2_sanitize_remote_url "$remote_url") | ||
|
|
||
| local hash_input="${sanitized_remote_url:-$project_root}" | ||
| # Use SHA256 via python3 (portable across macOS/Linux, no shasum/sha256sum divergence) | ||
|
Comment on lines
+79
to
83
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep These lines now hash the sanitized remote URL, but Also applies to: 107-108 🤖 Prompt for AI Agents |
||
| project_id=$(printf '%s' "$hash_input" | python3 -c "import sys,hashlib; print(hashlib.sha256(sys.stdin.buffer.read()).hexdigest()[:12])" 2>/dev/null) | ||
|
|
||
|
|
@@ -90,7 +105,7 @@ _clv2_detect_project() { | |
| mkdir -p "${_CLV2_PROJECT_DIR}/evolved/agents" | ||
|
|
||
| # Update project registry (lightweight JSON mapping) | ||
| _clv2_update_project_registry "$project_id" "$project_name" "$project_root" "$remote_url" | ||
| _clv2_update_project_registry "$project_id" "$project_name" "$project_root" "$sanitized_remote_url" | ||
| } | ||
|
|
||
| _clv2_update_project_registry() { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P1: The prefix-only allowlist doesn't prevent shell injection after the allowed command name. Because
execSyncruns through a shell, metacharacters like;,&&,|, or$()aftergit/node/etc. can execute arbitrary commands. Consider also rejecting shell metacharacters in the command string, e.g.:Alternatively, switch to
spawnSyncwith a parsed argument array to avoid shell interpretation entirely.Prompt for AI agents
Alternatively, switch to
@@ -331,16 +331,22 @@ function commandExists(cmd) { */ function runCommand(cmd, options = {}) { + const command = typeof cmd === 'string' ? cmd.trim() : ''; + const allowlistRegex = /^(git|node|npx|which|where)(\s|$)/; + + if (!allowlistRegex.test(command)) { ```spawnSyncwith a parsed argument array to avoid shell interpretation entirely.