-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Masking in diagnostics package #8567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,13 +5,14 @@ | |
| use crate::providers::utils::LOGS_TO_KEEP; | ||
| use crate::session::SessionManager; | ||
| use serde::{Deserialize, Serialize}; | ||
| use std::collections::HashMap; | ||
| use std::fs; | ||
| use std::io::Cursor; | ||
| use std::io::Write; | ||
| use std::path::PathBuf; | ||
| use utoipa::ToSchema; | ||
| use zip::write::SimpleFileOptions; | ||
| use zip::ZipWriter; | ||
| use zip::write::SimpleFileOptions; | ||
|
|
||
| #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] | ||
| pub struct SystemInfo { | ||
|
|
@@ -134,6 +135,142 @@ | |
| entries.last().map(|e| e.path()) | ||
| } | ||
|
|
||
| /// Shannon entropy in bits per character. | ||
| fn shannon_entropy(s: &str) -> f64 { | ||
| let len = s.len() as f64; | ||
| if len == 0.0 { | ||
| return 0.0; | ||
| } | ||
| let mut counts: HashMap<u8, usize> = HashMap::new(); | ||
| for &b in s.as_bytes() { | ||
| *counts.entry(b).or_default() += 1; | ||
| } | ||
| counts | ||
| .values() | ||
| .map(|&c| { | ||
| let p = c as f64 / len; | ||
| -p * p.log2() | ||
| }) | ||
| .sum() | ||
| } | ||
|
|
||
| const ENTROPY_THRESHOLD: f64 = 3.5; | ||
| const MIN_SECRET_LEN: usize = 20; | ||
|
|
||
| /// Returns true if `token` looks like a secret based on length, entropy, and | ||
| /// character composition. Secrets (API keys, tokens) are long, high-entropy | ||
| /// strings composed almost entirely of alphanumeric chars, hyphens, and | ||
| /// underscores. Non-secrets like URLs, paths, model names, and descriptions | ||
| /// contain structural characters (slashes, spaces, colons, etc.). | ||
| /// | ||
| /// Dotted tokens get special handling: JWT-shaped strings (three long base64 | ||
| /// segments separated by dots) are treated as secrets, while hostnames and | ||
| /// version numbers are not. | ||
| fn looks_like_secret(token: &str) -> bool { | ||
| if token.len() < MIN_SECRET_LEN { | ||
| return false; | ||
| } | ||
| if shannon_entropy(token) <= ENTROPY_THRESHOLD { | ||
| return false; | ||
| } | ||
| // Anything with URL, path, or natural-language characters is not a secret. | ||
| if token.bytes().any(|b| { | ||
| matches!( | ||
| b, | ||
| b' ' | b'/' | ||
| | b':' | ||
| | b'@' | ||
|
Comment on lines
+180
to
+182
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The Useful? React with 👍 / 👎. |
||
| | b',' | ||
| | b';' | ||
| | b'!' | ||
| | b'?' | ||
| | b'(' | ||
| | b')' | ||
| | b'[' | ||
| | b']' | ||
| | b'{' | ||
| | b'}' | ||
| | b'=' | ||
| | b'&' | ||
| | b'+' | ||
|
Comment on lines
+193
to
+195
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
| | b'#' | ||
| ) | ||
| }) { | ||
| return false; | ||
| } | ||
| // Dots need special handling: JWTs (header.payload.signature) are secrets, | ||
| // but hostnames (api.openai.com) and versions (6.2.9200) are not. | ||
| if token.contains('.') { | ||
| let parts: Vec<&str> = token.split('.').collect(); | ||
| return parts.len() == 3 && parts.iter().all(|p| p.len() >= 4); | ||
|
Comment on lines
+203
to
+205
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The new dotted-token branch returns Useful? React with 👍 / 👎. |
||
| } | ||
| true | ||
| } | ||
|
|
||
| /// Redact high-entropy tokens that look like secrets from arbitrary text. | ||
| /// | ||
| /// For `key: value` lines (YAML-style), checks the whole value and individual | ||
| /// words within it. For JSON, checks quoted string values. Key names are always | ||
| /// preserved for debugging. | ||
| #[allow(clippy::string_slice)] // All splits are on ASCII delimiters ('"', ": ") — safe. | ||
| fn redact_secrets(text: &str) -> String { | ||
| text.lines() | ||
| .map(|line| { | ||
| // YAML-style "KEY: value" — check the value and its individual words | ||
| if let Some(colon_pos) = line.find(": ") { | ||
| let key_part = &line[..colon_pos + 2]; | ||
| let value_part = line[colon_pos + 2..].trim(); | ||
| if looks_like_secret(value_part) { | ||
| return format!("{}[REDACTED]", key_part); | ||
| } | ||
| // Check individual words (handles "Bearer <token>" etc.) | ||
| let words: Vec<&str> = value_part.split_whitespace().collect(); | ||
| if words.iter().any(|w| looks_like_secret(w)) { | ||
| let redacted_words: Vec<String> = words | ||
| .iter() | ||
| .map(|w| { | ||
| if looks_like_secret(w) { | ||
| "[REDACTED]".to_string() | ||
| } else { | ||
| w.to_string() | ||
| } | ||
| }) | ||
| .collect(); | ||
| return format!("{}{}", key_part, redacted_words.join(" ")); | ||
| } | ||
| } | ||
|
|
||
| // JSON-style — redact quoted high-entropy values | ||
| let mut result = String::with_capacity(line.len()); | ||
| let mut rest = line; | ||
| while !rest.is_empty() { | ||
| if let Some(q_start) = rest.find('"') { | ||
| result.push_str(&rest[..q_start + 1]); | ||
| let after_quote = &rest[q_start + 1..]; | ||
| if let Some(q_end) = after_quote.find('"') { | ||
| let inner = &after_quote[..q_end]; | ||
| if looks_like_secret(inner) { | ||
| result.push_str("[REDACTED]"); | ||
| } else { | ||
| result.push_str(inner); | ||
| } | ||
| result.push('"'); | ||
| rest = &after_quote[q_end + 1..]; | ||
| } else { | ||
| result.push_str(after_quote); | ||
| break; | ||
| } | ||
| } else { | ||
| result.push_str(rest); | ||
| break; | ||
| } | ||
| } | ||
| result | ||
| }) | ||
| .collect::<Vec<_>>() | ||
| .join("\n") | ||
| } | ||
|
|
||
| pub async fn generate_diagnostics( | ||
| session_manager: &SessionManager, | ||
| session_id: &str, | ||
|
|
@@ -162,24 +299,29 @@ | |
| let path = entry.path(); | ||
| let name = path.file_name().unwrap().to_str().unwrap(); | ||
| zip.start_file(format!("logs/{}", name), options)?; | ||
| zip.write_all(&fs::read(&path)?)?; | ||
| let bytes = fs::read(&path)?; | ||
| let content = String::from_utf8_lossy(&bytes); | ||
| zip.write_all(redact_secrets(&content).as_bytes())?; | ||
| } | ||
|
|
||
| if let Some(server_log) = latest_server_log_path() { | ||
| if let Ok(content) = fs::read(&server_log) { | ||
| if let Ok(bytes) = fs::read(&server_log) { | ||
| let name = server_log.file_name().unwrap().to_str().unwrap(); | ||
| zip.start_file(format!("logs/server/{}", name), options)?; | ||
| zip.write_all(&content)?; | ||
| let content = String::from_utf8_lossy(&bytes); | ||
| zip.write_all(redact_secrets(&content).as_bytes())?; | ||
| } | ||
| } | ||
|
|
||
| let session_data = session_manager.export_session(session_id).await?; | ||
| zip.start_file("session.json", options)?; | ||
| zip.write_all(session_data.as_bytes())?; | ||
| zip.write_all(redact_secrets(&session_data).as_bytes())?; | ||
|
|
||
| if config_path.exists() { | ||
| let bytes = fs::read(&config_path)?; | ||
| let content = String::from_utf8_lossy(&bytes); | ||
| zip.start_file("config.yaml", options)?; | ||
| zip.write_all(&fs::read(&config_path)?)?; | ||
| zip.write_all(redact_secrets(&content).as_bytes())?; | ||
| } | ||
|
|
||
| zip.start_file("system.txt", options)?; | ||
|
|
@@ -215,3 +357,116 @@ | |
|
|
||
| Ok(buffer) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn redacts_api_key_in_yaml() { | ||
| let input = "OPENROUTER_API_KEY: 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"; | ||
| let output = redact_secrets(input); | ||
| assert_eq!(output, "OPENROUTER_API_KEY: [REDACTED]"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn redacts_api_key_in_json() { | ||
| let input = r#"{"key": "OPENROUTER_API_KEY", "value": "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"}"#; | ||
| let output = redact_secrets(input); | ||
| assert!(output.contains("[REDACTED]"), "secret should be redacted"); | ||
| assert!( | ||
| !output.contains("6c82424c"), | ||
| "raw key material must not appear" | ||
| ); | ||
| assert!( | ||
| output.contains("OPENROUTER_API_KEY"), | ||
| "key name should survive" | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn preserves_normal_config_values() { | ||
| let input = "GOOSE_PROVIDER: openrouter\nGOOSE_MODEL: openai/gpt-4o\nOPENROUTER_HOST: https://openrouter.ai"; | ||
| assert_eq!(redact_secrets(input), input); | ||
| } | ||
|
|
||
| #[test] | ||
| fn preserves_short_values() { | ||
| let input = "GOOSE_MODE: auto\nenabled: true"; | ||
| assert_eq!(redact_secrets(input), input); | ||
| } | ||
|
|
||
| #[test] | ||
| fn preserves_model_names() { | ||
| let input = "model: anthropic/claude-sonnet-4\nfast: google/gemini-2.5-flash"; | ||
| assert_eq!(redact_secrets(input), input); | ||
| } | ||
|
|
||
| #[test] | ||
| fn preserves_descriptions() { | ||
| let input = "description: Write and edit files, and execute shell commands"; | ||
| assert_eq!(redact_secrets(input), input); | ||
| } | ||
|
|
||
| #[test] | ||
| fn preserves_urls() { | ||
| let input = "host: https://api.openai.com\nreferer: https://goose-docs.ai"; | ||
| assert_eq!(redact_secrets(input), input); | ||
| } | ||
|
|
||
| #[test] | ||
| fn redacts_bearer_token_in_yaml_value() { | ||
| let input = "Authorization: Bearer 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"; | ||
| let output = redact_secrets(input); | ||
| // The YAML-style "key: value" check fires on "Authorization: Bearer sk-..." | ||
| // The bearer value itself is the high-entropy part | ||
| assert!(!output.contains("6c82424c"), "raw key must not appear"); | ||
| } | ||
|
|
||
| #[test] | ||
| fn redacts_multiple_secrets_preserves_normal() { | ||
| let input = "\ | ||
| OPENAI_API_KEY: sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx234yz\n\ | ||
| GOOSE_PROVIDER: openrouter\n\ | ||
| ANTHROPIC_API_KEY: sk-ant-api03-xyzabc123def456ghi789jkl012mno345pqr678stu901vwx"; | ||
| let output = redact_secrets(input); | ||
| assert!(output.contains("OPENAI_API_KEY: [REDACTED]")); | ||
| assert!(output.contains("GOOSE_PROVIDER: openrouter")); | ||
| assert!(output.contains("ANTHROPIC_API_KEY: [REDACTED]")); | ||
| } | ||
|
|
||
| #[test] | ||
| fn detects_jwt_tokens() { | ||
| assert!(looks_like_secret( | ||
| "eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123signature456xyz" | ||
| )); | ||
| } | ||
|
|
||
| #[test] | ||
| fn preserves_hostnames_and_versions() { | ||
| assert!(!looks_like_secret("api.openai.com")); | ||
| assert!(!looks_like_secret("openrouter.ai")); | ||
| assert!(!looks_like_secret("6.2.9200")); | ||
| } | ||
|
|
||
| #[test] | ||
| fn secret_detection_accuracy() { | ||
| // Real API keys: detected | ||
| assert!(looks_like_secret( | ||
| "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" | ||
| )); | ||
| assert!(looks_like_secret( | ||
| "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx234yz" | ||
| )); | ||
|
|
||
| // Normal values: not detected | ||
| assert!(!looks_like_secret("openrouter")); | ||
| assert!(!looks_like_secret("https://openrouter.ai")); | ||
| assert!(!looks_like_secret("anthropic/claude-sonnet-4")); | ||
| assert!(!looks_like_secret("google/gemini-2.5-flash")); | ||
| assert!(!looks_like_secret( | ||
| "Write and edit files, and execute shell commands" | ||
| )); | ||
| assert!(!looks_like_secret("OPENROUTER_API_KEY")); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing the
if: github.event_name != 'workflow_dispatch'guard causes manualworkflow_dispatchexecutions to run the productionpublishjob (pnpm publish --provenance) instead of staying build-only. That changes dry-run behavior into a real publish path, so an operator-triggered verification run can now either publish packages unexpectedly or fail during trusted-publishing validation, which is a release-blocking regression for this workflow.Useful? React with 👍 / 👎.