From 36041c539de310574039655ca76b0028bc31bc19 Mon Sep 17 00:00:00 2001 From: Shawn Scott Date: Thu, 26 Mar 2026 14:52:17 -0700 Subject: [PATCH] fix: grep false negatives, output mangling, and truncation annotations - grep: use --no-ignore-vcs so .gitignore'd files aren't silently skipped (matches grep -r behavior, avoids false negatives in large monorepos) - grep: passthrough raw output for <=50 matches so AI agents can parse standard file:line:content format without retry loops - filter: replace smart_truncate heuristic with clean first-N-lines truncation and a single [X more lines] suffix (eliminates synthetic // ... markers that AI agents misread as code, causing parsing confusion and retries) --- src/cmds/system/grep_cmd.rs | 159 ++++++++++++++++++++++++++---------- src/core/filter.rs | 87 ++++++++++---------- 2 files changed, 157 insertions(+), 89 deletions(-) diff --git a/src/cmds/system/grep_cmd.rs b/src/cmds/system/grep_cmd.rs index 4550e877..2c9a251f 100644 --- a/src/cmds/system/grep_cmd.rs +++ b/src/cmds/system/grep_cmd.rs @@ -28,7 +28,11 @@ pub fn run( let rg_pattern = pattern.replace(r"\|", "|"); let mut rg_cmd = resolved_command("rg"); - rg_cmd.args(["-n", "--no-heading", &rg_pattern, path]); + // --no-ignore-vcs: match grep -r behavior (don't skip .gitignore'd files). + // Without this, rg returns 0 matches for files in .gitignore, causing + // false negatives that make AI agents draw wrong conclusions. + // Using --no-ignore-vcs (not --no-ignore) so .ignore/.rgignore are still respected. + rg_cmd.args(["-n", "--no-heading", "--no-ignore-vcs", &rg_pattern, path]); if let Some(ft) = file_type { rg_cmd.arg("--type").arg(ft); @@ -78,67 +82,112 @@ pub fn run( return Ok(()); } - let mut by_file: HashMap> = HashMap::new(); - let mut total = 0; - - // Compile context regex once (instead of per-line in clean_line) - let context_re = if context_only { - Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok() + // Count total matches to decide output strategy + let total_matches = stdout.lines().count(); + + // Passthrough threshold: small results pass through raw so AI agents + // can parse standard grep output. Only use grouped format for large results + // where token savings are meaningful. The grouped format confuses AI agents + // on small result sets, causing retry loops that burn more tokens than saved. + let passthrough_threshold = 50; + + let rtk_output = if total_matches <= passthrough_threshold { + // Small result set: pass through raw rg/grep output. + // Truncate individual lines but preserve standard file:line:content format. + let mut out = String::new(); + let mut shown = 0; + for line in stdout.lines() { + if shown >= max_results { + break; + } + let parts: Vec<&str> = line.splitn(3, ':').collect(); + if parts.len() == 3 { + let file = parts[0]; + let line_num = parts[1]; + let content = parts[2].trim(); + if content.len() <= max_line_len { + out.push_str(&format!("{}:{}:{}\n", file, line_num, content)); + } else { + let truncated: String = content.chars().take(max_line_len - 3).collect(); + out.push_str(&format!("{}:{}:{}...\n", file, line_num, truncated)); + } + } else { + // Non-standard line (e.g., context separator), pass through + out.push_str(line); + out.push('\n'); + } + shown += 1; + } + if total_matches > max_results { + out.push_str(&format!("... +{} matches\n", total_matches - max_results)); + } + out } else { - None - }; + // Large result set: use grouped format for token savings + let mut by_file: HashMap> = HashMap::new(); - for line in stdout.lines() { - let parts: Vec<&str> = line.splitn(3, ':').collect(); - - let (file, line_num, content) = if parts.len() == 3 { - let ln = parts[1].parse().unwrap_or(0); - (parts[0].to_string(), ln, parts[2]) - } else if parts.len() == 2 { - let ln = parts[0].parse().unwrap_or(0); - (path.to_string(), ln, parts[1]) + let context_re = if context_only { + Regex::new(&format!("(?i).{{0,20}}{}.*", regex::escape(pattern))).ok() } else { - continue; + None }; - total += 1; - let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern); - by_file.entry(file).or_default().push((line_num, cleaned)); - } + for line in stdout.lines() { + let parts: Vec<&str> = line.splitn(3, ':').collect(); - let mut rtk_output = String::new(); - rtk_output.push_str(&format!("{} matches in {}F:\n\n", total, by_file.len())); - - let mut shown = 0; - let mut files: Vec<_> = by_file.iter().collect(); - files.sort_by_key(|(f, _)| *f); + let (file, line_num, content) = if parts.len() == 3 { + let ln = parts[1].parse().unwrap_or(0); + (parts[0].to_string(), ln, parts[2]) + } else if parts.len() == 2 { + let ln = parts[0].parse().unwrap_or(0); + (path.to_string(), ln, parts[1]) + } else { + continue; + }; - for (file, matches) in files { - if shown >= max_results { - break; + let cleaned = clean_line(content, max_line_len, context_re.as_ref(), pattern); + by_file.entry(file).or_default().push((line_num, cleaned)); } - let file_display = compact_path(file); - rtk_output.push_str(&format!("[file] {} ({}):\n", file_display, matches.len())); + let mut out = String::new(); + out.push_str(&format!( + "{} matches in {} files:\n\n", + total_matches, + by_file.len() + )); + + let mut shown = 0; + let mut files: Vec<_> = by_file.iter().collect(); + files.sort_by_key(|(f, _)| *f); let per_file = config::limits().grep_max_per_file; - for (line_num, content) in matches.iter().take(per_file) { - rtk_output.push_str(&format!(" {:>4}: {}\n", line_num, content)); - shown += 1; + for (file, matches) in files { if shown >= max_results { break; } - } - if matches.len() > per_file { - rtk_output.push_str(&format!(" +{}\n", matches.len() - per_file)); + let file_display = compact_path(file); + out.push_str(&format!("[file] {} ({}):\n", file_display, matches.len())); + + for (line_num, content) in matches.iter().take(per_file) { + out.push_str(&format!(" {:>4}: {}\n", line_num, content)); + shown += 1; + if shown >= max_results { + break; + } + } + + if matches.len() > per_file { + out.push_str(&format!(" +{}\n", matches.len() - per_file)); + } + out.push('\n'); } - rtk_output.push('\n'); - } - if total > shown { - rtk_output.push_str(&format!("... +{}\n", total - shown)); - } + if total_matches > shown { + out.push_str(&format!("... +{}\n", total_matches - shown)); + } + out + }; print!("{}", rtk_output); timer.track( @@ -320,4 +369,24 @@ mod tests { } // If rg is not installed, skip gracefully (test still passes) } + + #[test] + fn test_rg_no_ignore_vcs_flag_accepted() { + // Verify rg accepts --no-ignore-vcs (used to match grep -r behavior for .gitignore) + let mut cmd = resolved_command("rg"); + cmd.args([ + "-n", + "--no-heading", + "--no-ignore-vcs", + "NONEXISTENT_PATTERN_12345", + ".", + ]); + if let Ok(output) = cmd.output() { + assert!( + output.status.code() == Some(1) || output.status.success(), + "rg --no-ignore-vcs should be accepted" + ); + } + // If rg is not installed, skip gracefully (test still passes) + } } diff --git a/src/core/filter.rs b/src/core/filter.rs index 1fc5a23f..9338827c 100644 --- a/src/core/filter.rs +++ b/src/core/filter.rs @@ -340,47 +340,16 @@ pub fn smart_truncate(content: &str, max_lines: usize, _lang: &Language) -> Stri return content.to_string(); } - let mut result = Vec::with_capacity(max_lines); - let mut kept_lines = 0; - let mut skipped_section = false; - - for line in &lines { - let trimmed = line.trim(); - - // Always keep signatures and important structural elements - let is_important = FUNC_SIGNATURE.is_match(trimmed) - || IMPORT_PATTERN.is_match(trimmed) - || trimmed.starts_with("pub ") - || trimmed.starts_with("export ") - || trimmed == "}" - || trimmed == "{"; - - if is_important || kept_lines < max_lines / 2 { - if skipped_section { - result.push(format!( - " // ... {} lines omitted", - lines.len() - kept_lines - )); - skipped_section = false; - } - result.push((*line).to_string()); - kept_lines += 1; - } else { - skipped_section = true; - } - - if kept_lines >= max_lines - 1 { - break; - } - } - - if skipped_section || kept_lines < lines.len() { - result.push(format!( - "// ... {} more lines (total: {})", - lines.len() - kept_lines, - lines.len() - )); - } + // Clean truncation: take first max_lines lines only. + // The old approach inserted synthetic "// ... N lines omitted" markers + // that AI agents treated as file content, causing parsing confusion + // and retry loops that burned more tokens than the filtering saved. + let mut result: Vec = lines[..max_lines] + .iter() + .map(|l| (*l).to_string()) + .collect(); + let omitted = lines.len() - max_lines; + result.push(format!("[{} more lines]", omitted)); result.join("\n") } @@ -499,9 +468,9 @@ fn main() { #[test] fn test_smart_truncate_overflow_count_exact() { // 200 plain-text lines with max_lines=20. - // smart_truncate keeps the first max_lines/2=10 lines, then skips the rest. - // The overflow message "// ... N more lines (total: T)" must satisfy: - // kept_count + N == T + // smart_truncate keeps the first max_lines=20 lines. + // The overflow message "[N more lines]" must satisfy: + // kept_count + N == total_lines let total_lines = 200usize; let max_lines = 20usize; let content: String = (0..total_lines) @@ -538,4 +507,34 @@ fn main() { total_lines ); } + + #[test] + fn test_smart_truncate_no_annotations() { + let input = "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\n"; + let output = smart_truncate(input, 3, &Language::Unknown); + // Must NOT contain old-style "// ... N lines omitted" annotations + assert!( + !output.contains("// ..."), + "smart_truncate must not insert synthetic comment annotations" + ); + // Must contain clean truncation marker + assert!(output.contains("[7 more lines]")); + // Must preserve first 3 lines verbatim + assert!(output.starts_with("line1\nline2\nline3\n")); + } + + #[test] + fn test_smart_truncate_no_truncation_when_under_limit() { + let input = "a\nb\nc\n"; + let output = smart_truncate(input, 10, &Language::Unknown); + assert_eq!(output, input); + assert!(!output.contains("more lines")); + } + + #[test] + fn test_smart_truncate_exact_limit() { + let input = "a\nb\nc"; + let output = smart_truncate(input, 3, &Language::Unknown); + assert_eq!(output, input); + } }