mpecan · mpecan · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/src/format/mod.rs b/src/format/mod.rs
@@ -333,9 +333,35 @@ fn format_list(items: &[ListItem], out: &mut String, indent: usize) {
         format_node(&item.command, out, indent);
     }
     // Write trailing operator on the last item (e.g., `cmd &`)
-    if let Some(op) = items.last().and_then(|last| last.operator) {
-        format_list_op(op, out);
+    if let Some(last) = items.last()
+        && let Some(op) = last.operator
+    {
+        if has_heredoc_redirect_deep(&last.command) {
+            insert_op_before_heredoc(op, out);
+        } else {
+            format_list_op(op, out);
+        }
+    }
+}
+
+/// Inserts a trailing operator (like `&`) on the delimiter line
+/// before the heredoc content, rather than after it.
+fn insert_op_before_heredoc(op: ListOperator, out: &mut String) {
+    // The output currently ends with: `<<delim\ncontent\ndelim\n`
+    // Find the first `\n` after the `<<` delimiter line to insert the
+    // operator there: `<<delim &\ncontent\ndelim\n`
+    // Strategy: find the position of the first \n after the last `<<`
+    if let Some(heredoc_pos) = out.rfind("<<")
+        && let Some(nl_pos) = out[heredoc_pos..].find('\n')
+    {
+        let insert_at = heredoc_pos + nl_pos;
+        let mut op_str = String::new();
+        format_list_op(op, &mut op_str);
+        out.insert_str(insert_at, op_str.trim_end());
+        return;
     }
+    // Fallback: just append
+    format_list_op(op, out);
 }
 
 fn format_list_op(op: ListOperator, out: &mut String) {

diff --git a/src/lexer/expansions.rs b/src/lexer/expansions.rs
@@ -46,7 +46,7 @@ impl Lexer {
             Some('[') => {
                 self.advance_char();
                 wb.push('[');
-                self.read_until_char(wb, ']')?;
+                self.read_deprecated_arith(wb)?;
                 wb.record(span_start, WordSpanKind::DeprecatedArith);
             }
             Some(c) if is_dollar_start(c) => {
@@ -168,6 +168,8 @@ impl Lexer {
                         wb.push('\\');
                         if let Some(c) = self.advance_char() {
                             wb.push(c);
+                        } else {
+                            wb.push('\\');
                         }
                     }
                 }
@@ -281,6 +283,8 @@ impl Lexer {
                         wb.push('\\');
                         if let Some(c) = self.advance_char() {
                             wb.push(c);
+                        } else {
+                            wb.push('\\');
                         }
                     }
                 }
@@ -345,18 +349,26 @@ impl Lexer {
         }
     }
 
-    /// Reads until the given closing character.
-    pub(super) fn read_until_char(&mut self, wb: &mut WordBuilder, close: char) -> Result<()> {
+    /// Reads deprecated `$[...]` arithmetic with bracket depth tracking.
+    fn read_deprecated_arith(&mut self, wb: &mut WordBuilder) -> Result<()> {
+        let mut depth = 1;
         loop {
             match self.advance_char() {
-                Some(c) if c == close => {
-                    wb.push(c);
-                    return Ok(());
+                Some('[') => {
+                    depth += 1;
+                    wb.push('[');
+                }
+                Some(']') => {
+                    depth -= 1;
+                    wb.push(']');
+                    if depth == 0 {
+                        return Ok(());
+                    }
                 }
                 Some(c) => wb.push(c),
                 None => {
                     return Err(RableError::matched_pair(
-                        format!("unterminated '{close}'"),
+                        "unterminated '$['",
                         self.pos,
                         self.line,
                     ));

diff --git a/src/lexer/heredoc.rs b/src/lexer/heredoc.rs
@@ -38,6 +38,7 @@ impl Lexer {
             // Read a line
             let mut line = String::new();
             let mut prev_backslash = false;
+            let mut eof_after_backslash = false;
             while let Some(c) = self.peek_char() {
                 self.advance_char();
                 if c == '\n' {
@@ -49,24 +50,37 @@ impl Lexer {
                     prev_backslash = false;
                     continue;
                 }
-                prev_backslash = c == '\\' && !prev_backslash;
-                line.push(c);
+                if c == '\\' && !prev_backslash && self.peek_char().is_none() {
+                    // Trailing \ at EOF — treat as literal \\
+                    line.push('\\');
+                    line.push('\\');
+                    prev_backslash = false;
+                    eof_after_backslash = true;
+                } else {
+                    prev_backslash = c == '\\' && !prev_backslash;
+                    line.push(c);
+                }
             }
             // Check if this line matches the delimiter
             let check_line = if strip_tabs {
                 line.trim_start_matches('\t')
             } else {
                 &line
             };
-            if check_line == delimiter {
+            // Match delimiter exactly, or with trailing whitespace
+            // (bash allows trailing spaces on the delimiter line)
+            if check_line == delimiter || check_line.trim_end() == delimiter {
                 break;
             }
             if strip_tabs {
                 content.push_str(line.trim_start_matches('\t'));
             } else {
                 content.push_str(&line);
             }
-            content.push('\n');
+            // Trailing \ at EOF consumes the implicit newline
+            if !eof_after_backslash {
+                content.push('\n');
+            }
         }
         content
     }

diff --git a/src/lexer/quotes.rs b/src/lexer/quotes.rs
@@ -81,6 +81,8 @@ impl Lexer {
                         wb.push('\\');
                         if let Some(next) = self.advance_char() {
                             wb.push(next);
+                        } else {
+                            wb.push('\\');
                         }
                     }
                 }

diff --git a/src/lexer/words.rs b/src/lexer/words.rs
@@ -129,6 +129,9 @@ impl Lexer {
                     wb.push('\\');
                     if let Some(next) = self.advance_char() {
                         wb.push(next);
+                    } else {
+                        // Trailing \ at EOF — bash keeps it as literal \\
+                        wb.push('\\');
                     }
                     wb.record(start, WordSpanKind::Escape);
                 }
@@ -364,13 +367,15 @@ fn is_assignment_word(value: &str) -> bool {
         match bytes[i] {
             b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' => i += 1,
             b'[' => {
-                // Skip subscript [...] (may be nested)
+                // Skip subscript [...] — reject if it contains whitespace
+                // (bash doesn't allow spaces in assignment subscripts)
                 i += 1;
                 let mut depth = 1;
                 while i < bytes.len() && depth > 0 {
                     match bytes[i] {
                         b'[' => depth += 1,
                         b']' => depth -= 1,
+                        b' ' | b'\t' | b'\n' => return false,
                         _ => {}
                     }
                     i += 1;

diff --git a/src/parser/compound.rs b/src/parser/compound.rs
@@ -498,6 +498,42 @@ impl Parser {
 
         let first_tok = self.lexer.next_token()?;
         self.lexer.set_command_start();
+
+        // If first token after coproc is a redirect operator, parse as
+        // a command with redirects (no name, no command word)
+        if matches!(
+            first_tok.kind,
+            TokenType::Less
+                | TokenType::Greater
+                | TokenType::DoubleGreater
+                | TokenType::LessAnd
+                | TokenType::GreaterAnd
+                | TokenType::LessGreater
+                | TokenType::GreaterPipe
+                | TokenType::AndGreater
+                | TokenType::AndDoubleGreater
+                | TokenType::DoubleLess
+                | TokenType::DoubleLessDash
+                | TokenType::TripleLess
+        ) {
+            let mut redirects = vec![self.build_redirect(first_tok, -1)?];
+            redirects.extend(self.parse_trailing_redirects()?);
+            return Ok(self.spanned(
+                start,
+                NodeKind::Coproc {
+                    name: None,
+                    command: Box::new(self.spanned(
+                        start,
+                        NodeKind::Command {
+                            assignments: Vec::new(),
+                            words: Vec::new(),
+                            redirects,
+                        },
+                    )),
+                },
+            ));
+        }
+
         let next = self.lexer.peek_token()?;
         let name = if next.kind.starts_command()
             && !matches!(

diff --git a/src/parser/helpers.rs b/src/parser/helpers.rs
@@ -36,15 +36,19 @@ pub(super) fn is_fd_number(s: &str) -> bool {
 }
 
 /// Returns true if the string is a variable fd reference like `{varname}`.
+/// Requires valid bash variable name: starts with letter or `_`, then
+/// alphanumeric or `_`.
 pub(super) fn is_varfd(s: &str) -> bool {
     s.starts_with('{')
         && s.ends_with('}')
         && s.len() >= 3
+        // First char must be letter or underscore (valid variable name start)
+        && s.as_bytes()
+            .get(1)
+            .is_some_and(|&c| c.is_ascii_alphabetic() || c == b'_')
         && s[1..s.len() - 1]
             .chars()
             .all(|c| c.is_ascii_alphanumeric() || c == '_')
-        // Must contain at least one letter (not just digits — {4} is not a varfd)
-        && s[1..s.len() - 1].chars().any(|c| c.is_ascii_alphabetic() || c == '_')
 }
 
 /// Returns true if the string is a conditional binary operator.

diff --git a/src/sexp/mod.rs b/src/sexp/mod.rs
@@ -370,6 +370,11 @@ pub(crate) fn process_ansi_c_content(chars: &[char], pos: &mut usize) -> String
                         // High bytes are invalid standalone UTF-8 — replacement char
                         out.push('\u{FFFD}');
                     } else if let Some(ch) = char::from_u32(hex) {
+                        // Bash prefixes CTLESC (0x01) and CTLNUL (0x7F) with
+                        // CTLESC in its internal representation
+                        if ch == '\x01' || ch == '\x7F' {
+                            out.push('\x01');
+                        }
                         out.push(ch);
                     }
                 }
@@ -440,6 +445,9 @@ pub(crate) fn process_ansi_c_content(chars: &[char], pos: &mut usize) -> String
                         return out;
                     }
                     if let Some(ch) = char::from_u32(val) {
+                        if ch == '\x01' || ch == '\x7F' {
+                            out.push('\x01');
+                        }
                         out.push(ch);
                     }
                 }

diff --git a/tests/integration.rs b/tests/integration.rs
@@ -203,26 +203,12 @@ parable_tests! {
 /// When a fix makes one of these pass, the test suite will fail with
 /// "NEWLY PASSING" so you know to remove it from this list.
 const KNOWN_ORACLE_FAILURES: &[&str] = &[
-    // Trailing backslash doubling
-    "ansi_c_escapes 3",
-    "redirect_formatting 3",
-    "heredoc_formatting 1",
-    // ANSI-C \x single hex digit and \0 octal repeat behavior
-    "ansi_c_escapes 13",
-    "other 10",
-    // Heredoc delimiter edge cases
-    "ansi_c_escapes 18",
-    "heredoc_formatting 8",
-    // Varfd {6d} not recognized → word dropped
-    "heredoc_formatting 9",
-    // Coproc with adjacent redirect
-    "redirect_formatting 7",
-    // Background & placement after heredoc in cmdsub
+    // Cosmetic: bash adds a space before ) in $(cmd <<heredoc &\n...\n )
+    // but we produce $(cmd <<heredoc &\n...\n). The space is semantically
+    // irrelevant — $(cmd ) and $(cmd) are identical in bash. The space
+    // is an artifact of bash's internal parser boundary between heredoc
+    // content and the $(...) close delimiter.
     "cmdsub_formatting 9",
-    // Deprecated $[...] with ; splits word
-    "word_boundaries 8",
-    // Assignment detection causes esac to be keyword
-    "word_boundaries 2",
 ];
 
 #[derive(Default)]