diff --git a/src/format/mod.rs b/src/format/mod.rs index e1998fa..360701e 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -333,9 +333,35 @@ fn format_list(items: &[ListItem], out: &mut String, indent: usize) { format_node(&item.command, out, indent); } // Write trailing operator on the last item (e.g., `cmd &`) - if let Some(op) = items.last().and_then(|last| last.operator) { - format_list_op(op, out); + if let Some(last) = items.last() + && let Some(op) = last.operator + { + if has_heredoc_redirect_deep(&last.command) { + insert_op_before_heredoc(op, out); + } else { + format_list_op(op, out); + } + } +} + +/// Inserts a trailing operator (like `&`) on the delimiter line +/// before the heredoc content, rather than after it. +fn insert_op_before_heredoc(op: ListOperator, out: &mut String) { + // The output currently ends with: `< { self.advance_char(); wb.push('['); - self.read_until_char(wb, ']')?; + self.read_deprecated_arith(wb)?; wb.record(span_start, WordSpanKind::DeprecatedArith); } Some(c) if is_dollar_start(c) => { @@ -168,6 +168,8 @@ impl Lexer { wb.push('\\'); if let Some(c) = self.advance_char() { wb.push(c); + } else { + wb.push('\\'); } } } @@ -281,6 +283,8 @@ impl Lexer { wb.push('\\'); if let Some(c) = self.advance_char() { wb.push(c); + } else { + wb.push('\\'); } } } @@ -345,18 +349,26 @@ impl Lexer { } } - /// Reads until the given closing character. - pub(super) fn read_until_char(&mut self, wb: &mut WordBuilder, close: char) -> Result<()> { + /// Reads deprecated `$[...]` arithmetic with bracket depth tracking. + fn read_deprecated_arith(&mut self, wb: &mut WordBuilder) -> Result<()> { + let mut depth = 1; loop { match self.advance_char() { - Some(c) if c == close => { - wb.push(c); - return Ok(()); + Some('[') => { + depth += 1; + wb.push('['); + } + Some(']') => { + depth -= 1; + wb.push(']'); + if depth == 0 { + return Ok(()); + } } Some(c) => wb.push(c), None => { return Err(RableError::matched_pair( - format!("unterminated '{close}'"), + "unterminated '$['", self.pos, self.line, )); diff --git a/src/lexer/heredoc.rs b/src/lexer/heredoc.rs index 36e2882..05fa4af 100644 --- a/src/lexer/heredoc.rs +++ b/src/lexer/heredoc.rs @@ -38,6 +38,7 @@ impl Lexer { // Read a line let mut line = String::new(); let mut prev_backslash = false; + let mut eof_after_backslash = false; while let Some(c) = self.peek_char() { self.advance_char(); if c == '\n' { @@ -49,8 +50,16 @@ impl Lexer { prev_backslash = false; continue; } - prev_backslash = c == '\\' && !prev_backslash; - line.push(c); + if c == '\\' && !prev_backslash && self.peek_char().is_none() { + // Trailing \ at EOF — treat as literal \\ + line.push('\\'); + line.push('\\'); + prev_backslash = false; + eof_after_backslash = true; + } else { + prev_backslash = c == '\\' && !prev_backslash; + line.push(c); + } } // Check if this line matches the delimiter let check_line = if strip_tabs { @@ -58,7 +67,9 @@ impl Lexer { } else { &line }; - if check_line == delimiter { + // Match delimiter exactly, or with trailing whitespace + // (bash allows trailing spaces on the delimiter line) + if check_line == delimiter || check_line.trim_end() == delimiter { break; } if strip_tabs { @@ -66,7 +77,10 @@ impl Lexer { } else { content.push_str(&line); } - content.push('\n'); + // Trailing \ at EOF consumes the implicit newline + if !eof_after_backslash { + content.push('\n'); + } } content } diff --git a/src/lexer/quotes.rs b/src/lexer/quotes.rs index 6adf639..b449c52 100644 --- a/src/lexer/quotes.rs +++ b/src/lexer/quotes.rs @@ -81,6 +81,8 @@ impl Lexer { wb.push('\\'); if let Some(next) = self.advance_char() { wb.push(next); + } else { + wb.push('\\'); } } } diff --git a/src/lexer/words.rs b/src/lexer/words.rs index 8a4a981..0bfb3e1 100644 --- a/src/lexer/words.rs +++ b/src/lexer/words.rs @@ -129,6 +129,9 @@ impl Lexer { wb.push('\\'); if let Some(next) = self.advance_char() { wb.push(next); + } else { + // Trailing \ at EOF — bash keeps it as literal \\ + wb.push('\\'); } wb.record(start, WordSpanKind::Escape); } @@ -364,13 +367,15 @@ fn is_assignment_word(value: &str) -> bool { match bytes[i] { b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' => i += 1, b'[' => { - // Skip subscript [...] (may be nested) + // Skip subscript [...] — reject if it contains whitespace + // (bash doesn't allow spaces in assignment subscripts) i += 1; let mut depth = 1; while i < bytes.len() && depth > 0 { match bytes[i] { b'[' => depth += 1, b']' => depth -= 1, + b' ' | b'\t' | b'\n' => return false, _ => {} } i += 1; diff --git a/src/parser/compound.rs b/src/parser/compound.rs index baf4ce7..a99e8a1 100644 --- a/src/parser/compound.rs +++ b/src/parser/compound.rs @@ -498,6 +498,42 @@ impl Parser { let first_tok = self.lexer.next_token()?; self.lexer.set_command_start(); + + // If first token after coproc is a redirect operator, parse as + // a command with redirects (no name, no command word) + if matches!( + first_tok.kind, + TokenType::Less + | TokenType::Greater + | TokenType::DoubleGreater + | TokenType::LessAnd + | TokenType::GreaterAnd + | TokenType::LessGreater + | TokenType::GreaterPipe + | TokenType::AndGreater + | TokenType::AndDoubleGreater + | TokenType::DoubleLess + | TokenType::DoubleLessDash + | TokenType::TripleLess + ) { + let mut redirects = vec![self.build_redirect(first_tok, -1)?]; + redirects.extend(self.parse_trailing_redirects()?); + return Ok(self.spanned( + start, + NodeKind::Coproc { + name: None, + command: Box::new(self.spanned( + start, + NodeKind::Command { + assignments: Vec::new(), + words: Vec::new(), + redirects, + }, + )), + }, + )); + } + let next = self.lexer.peek_token()?; let name = if next.kind.starts_command() && !matches!( diff --git a/src/parser/helpers.rs b/src/parser/helpers.rs index 72e42dd..5ecc0fd 100644 --- a/src/parser/helpers.rs +++ b/src/parser/helpers.rs @@ -36,15 +36,19 @@ pub(super) fn is_fd_number(s: &str) -> bool { } /// Returns true if the string is a variable fd reference like `{varname}`. +/// Requires valid bash variable name: starts with letter or `_`, then +/// alphanumeric or `_`. pub(super) fn is_varfd(s: &str) -> bool { s.starts_with('{') && s.ends_with('}') && s.len() >= 3 + // First char must be letter or underscore (valid variable name start) + && s.as_bytes() + .get(1) + .is_some_and(|&c| c.is_ascii_alphabetic() || c == b'_') && s[1..s.len() - 1] .chars() .all(|c| c.is_ascii_alphanumeric() || c == '_') - // Must contain at least one letter (not just digits — {4} is not a varfd) - && s[1..s.len() - 1].chars().any(|c| c.is_ascii_alphabetic() || c == '_') } /// Returns true if the string is a conditional binary operator. diff --git a/src/sexp/mod.rs b/src/sexp/mod.rs index b41a637..5e8f785 100644 --- a/src/sexp/mod.rs +++ b/src/sexp/mod.rs @@ -370,6 +370,11 @@ pub(crate) fn process_ansi_c_content(chars: &[char], pos: &mut usize) -> String // High bytes are invalid standalone UTF-8 — replacement char out.push('\u{FFFD}'); } else if let Some(ch) = char::from_u32(hex) { + // Bash prefixes CTLESC (0x01) and CTLNUL (0x7F) with + // CTLESC in its internal representation + if ch == '\x01' || ch == '\x7F' { + out.push('\x01'); + } out.push(ch); } } @@ -440,6 +445,9 @@ pub(crate) fn process_ansi_c_content(chars: &[char], pos: &mut usize) -> String return out; } if let Some(ch) = char::from_u32(val) { + if ch == '\x01' || ch == '\x7F' { + out.push('\x01'); + } out.push(ch); } } diff --git a/tests/integration.rs b/tests/integration.rs index 37ad57d..ab5da13 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -203,26 +203,12 @@ parable_tests! { /// When a fix makes one of these pass, the test suite will fail with /// "NEWLY PASSING" so you know to remove it from this list. const KNOWN_ORACLE_FAILURES: &[&str] = &[ - // Trailing backslash doubling - "ansi_c_escapes 3", - "redirect_formatting 3", - "heredoc_formatting 1", - // ANSI-C \x single hex digit and \0 octal repeat behavior - "ansi_c_escapes 13", - "other 10", - // Heredoc delimiter edge cases - "ansi_c_escapes 18", - "heredoc_formatting 8", - // Varfd {6d} not recognized → word dropped - "heredoc_formatting 9", - // Coproc with adjacent redirect - "redirect_formatting 7", - // Background & placement after heredoc in cmdsub + // Cosmetic: bash adds a space before ) in $(cmd <