Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions src/format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,35 @@ fn format_list(items: &[ListItem], out: &mut String, indent: usize) {
format_node(&item.command, out, indent);
}
// Write trailing operator on the last item (e.g., `cmd &`)
if let Some(op) = items.last().and_then(|last| last.operator) {
format_list_op(op, out);
if let Some(last) = items.last()
&& let Some(op) = last.operator
{
if has_heredoc_redirect_deep(&last.command) {
insert_op_before_heredoc(op, out);
} else {
format_list_op(op, out);
}
}
}

/// Inserts a trailing operator (like `&`) on the delimiter line
/// before the heredoc content, rather than after it.
fn insert_op_before_heredoc(op: ListOperator, out: &mut String) {
// The output currently ends with: `<<delim\ncontent\ndelim\n`
// Find the first `\n` after the `<<` delimiter line to insert the
// operator there: `<<delim &\ncontent\ndelim\n`
// Strategy: find the position of the first \n after the last `<<`
if let Some(heredoc_pos) = out.rfind("<<")
&& let Some(nl_pos) = out[heredoc_pos..].find('\n')
{
let insert_at = heredoc_pos + nl_pos;
let mut op_str = String::new();
format_list_op(op, &mut op_str);
out.insert_str(insert_at, op_str.trim_end());
return;
}
// Fallback: just append
format_list_op(op, out);
}

fn format_list_op(op: ListOperator, out: &mut String) {
Expand Down
26 changes: 19 additions & 7 deletions src/lexer/expansions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl Lexer {
Some('[') => {
self.advance_char();
wb.push('[');
self.read_until_char(wb, ']')?;
self.read_deprecated_arith(wb)?;
wb.record(span_start, WordSpanKind::DeprecatedArith);
}
Some(c) if is_dollar_start(c) => {
Expand Down Expand Up @@ -168,6 +168,8 @@ impl Lexer {
wb.push('\\');
if let Some(c) = self.advance_char() {
wb.push(c);
} else {
wb.push('\\');
}
}
}
Expand Down Expand Up @@ -281,6 +283,8 @@ impl Lexer {
wb.push('\\');
if let Some(c) = self.advance_char() {
wb.push(c);
} else {
wb.push('\\');
}
}
}
Expand Down Expand Up @@ -345,18 +349,26 @@ impl Lexer {
}
}

/// Reads until the given closing character.
pub(super) fn read_until_char(&mut self, wb: &mut WordBuilder, close: char) -> Result<()> {
/// Reads deprecated `$[...]` arithmetic with bracket depth tracking.
fn read_deprecated_arith(&mut self, wb: &mut WordBuilder) -> Result<()> {
let mut depth = 1;
loop {
match self.advance_char() {
Some(c) if c == close => {
wb.push(c);
return Ok(());
Some('[') => {
depth += 1;
wb.push('[');
}
Some(']') => {
depth -= 1;
wb.push(']');
if depth == 0 {
return Ok(());
}
}
Some(c) => wb.push(c),
None => {
return Err(RableError::matched_pair(
format!("unterminated '{close}'"),
"unterminated '$['",
self.pos,
self.line,
));
Expand Down
22 changes: 18 additions & 4 deletions src/lexer/heredoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ impl Lexer {
// Read a line
let mut line = String::new();
let mut prev_backslash = false;
let mut eof_after_backslash = false;
while let Some(c) = self.peek_char() {
self.advance_char();
if c == '\n' {
Expand All @@ -49,24 +50,37 @@ impl Lexer {
prev_backslash = false;
continue;
}
prev_backslash = c == '\\' && !prev_backslash;
line.push(c);
if c == '\\' && !prev_backslash && self.peek_char().is_none() {
// Trailing \ at EOF — treat as literal \\
line.push('\\');
line.push('\\');
prev_backslash = false;
eof_after_backslash = true;
} else {
prev_backslash = c == '\\' && !prev_backslash;
line.push(c);
}
}
// Check if this line matches the delimiter
let check_line = if strip_tabs {
line.trim_start_matches('\t')
} else {
&line
};
if check_line == delimiter {
// Match delimiter exactly, or with trailing whitespace
// (bash allows trailing spaces on the delimiter line)
if check_line == delimiter || check_line.trim_end() == delimiter {
break;
}
if strip_tabs {
content.push_str(line.trim_start_matches('\t'));
} else {
content.push_str(&line);
}
content.push('\n');
// Trailing \ at EOF consumes the implicit newline
if !eof_after_backslash {
content.push('\n');
}
}
content
}
Expand Down
2 changes: 2 additions & 0 deletions src/lexer/quotes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ impl Lexer {
wb.push('\\');
if let Some(next) = self.advance_char() {
wb.push(next);
} else {
wb.push('\\');
}
}
}
Expand Down
7 changes: 6 additions & 1 deletion src/lexer/words.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ impl Lexer {
wb.push('\\');
if let Some(next) = self.advance_char() {
wb.push(next);
} else {
// Trailing \ at EOF — bash keeps it as literal \\
wb.push('\\');
}
wb.record(start, WordSpanKind::Escape);
}
Expand Down Expand Up @@ -364,13 +367,15 @@ fn is_assignment_word(value: &str) -> bool {
match bytes[i] {
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' => i += 1,
b'[' => {
// Skip subscript [...] (may be nested)
// Skip subscript [...] — reject if it contains whitespace
// (bash doesn't allow spaces in assignment subscripts)
i += 1;
let mut depth = 1;
while i < bytes.len() && depth > 0 {
match bytes[i] {
b'[' => depth += 1,
b']' => depth -= 1,
b' ' | b'\t' | b'\n' => return false,
_ => {}
}
i += 1;
Expand Down
36 changes: 36 additions & 0 deletions src/parser/compound.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,42 @@ impl Parser {

let first_tok = self.lexer.next_token()?;
self.lexer.set_command_start();

// If first token after coproc is a redirect operator, parse as
// a command with redirects (no name, no command word)
if matches!(
first_tok.kind,
TokenType::Less
| TokenType::Greater
| TokenType::DoubleGreater
| TokenType::LessAnd
| TokenType::GreaterAnd
| TokenType::LessGreater
| TokenType::GreaterPipe
| TokenType::AndGreater
| TokenType::AndDoubleGreater
| TokenType::DoubleLess
| TokenType::DoubleLessDash
| TokenType::TripleLess
) {
let mut redirects = vec![self.build_redirect(first_tok, -1)?];
redirects.extend(self.parse_trailing_redirects()?);
return Ok(self.spanned(
start,
NodeKind::Coproc {
name: None,
command: Box::new(self.spanned(
start,
NodeKind::Command {
assignments: Vec::new(),
words: Vec::new(),
redirects,
},
)),
},
));
}

let next = self.lexer.peek_token()?;
let name = if next.kind.starts_command()
&& !matches!(
Expand Down
8 changes: 6 additions & 2 deletions src/parser/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,19 @@ pub(super) fn is_fd_number(s: &str) -> bool {
}

/// Returns true if the string is a variable fd reference like `{varname}`.
/// Requires valid bash variable name: starts with letter or `_`, then
/// alphanumeric or `_`.
pub(super) fn is_varfd(s: &str) -> bool {
s.starts_with('{')
&& s.ends_with('}')
&& s.len() >= 3
// First char must be letter or underscore (valid variable name start)
&& s.as_bytes()
.get(1)
.is_some_and(|&c| c.is_ascii_alphabetic() || c == b'_')
&& s[1..s.len() - 1]
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_')
// Must contain at least one letter (not just digits — {4} is not a varfd)
&& s[1..s.len() - 1].chars().any(|c| c.is_ascii_alphabetic() || c == '_')
}

/// Returns true if the string is a conditional binary operator.
Expand Down
8 changes: 8 additions & 0 deletions src/sexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,11 @@ pub(crate) fn process_ansi_c_content(chars: &[char], pos: &mut usize) -> String
// High bytes are invalid standalone UTF-8 — replacement char
out.push('\u{FFFD}');
} else if let Some(ch) = char::from_u32(hex) {
// Bash prefixes CTLESC (0x01) and CTLNUL (0x7F) with
// CTLESC in its internal representation
if ch == '\x01' || ch == '\x7F' {
out.push('\x01');
}
out.push(ch);
}
}
Expand Down Expand Up @@ -440,6 +445,9 @@ pub(crate) fn process_ansi_c_content(chars: &[char], pos: &mut usize) -> String
return out;
}
if let Some(ch) = char::from_u32(val) {
if ch == '\x01' || ch == '\x7F' {
out.push('\x01');
}
out.push(ch);
}
}
Expand Down
24 changes: 5 additions & 19 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,26 +203,12 @@ parable_tests! {
/// When a fix makes one of these pass, the test suite will fail with
/// "NEWLY PASSING" so you know to remove it from this list.
const KNOWN_ORACLE_FAILURES: &[&str] = &[
// Trailing backslash doubling
"ansi_c_escapes 3",
"redirect_formatting 3",
"heredoc_formatting 1",
// ANSI-C \x single hex digit and \0 octal repeat behavior
"ansi_c_escapes 13",
"other 10",
// Heredoc delimiter edge cases
"ansi_c_escapes 18",
"heredoc_formatting 8",
// Varfd {6d} not recognized → word dropped
"heredoc_formatting 9",
// Coproc with adjacent redirect
"redirect_formatting 7",
// Background & placement after heredoc in cmdsub
// Cosmetic: bash adds a space before ) in $(cmd <<heredoc &\n...\n )
// but we produce $(cmd <<heredoc &\n...\n). The space is semantically
// irrelevant — $(cmd ) and $(cmd) are identical in bash. The space
// is an artifact of bash's internal parser boundary between heredoc
// content and the $(...) close delimiter.
"cmdsub_formatting 9",
// Deprecated $[...] with ; splits word
"word_boundaries 8",
// Assignment detection causes esac to be keyword
"word_boundaries 2",
];

#[derive(Default)]
Expand Down
Loading