diff --git a/CHANGELOG.md b/CHANGELOG.md index e9f46c18..c9abcf29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,45 @@ # Development version +- Autobracing is a new feature applied to if statements, for loops, while loops, + repeat loops, and function definitions. This feature will automatically add + `{}` around the body of these code elements in certain cases to maximize + readability, consistency, and portability (#225, #334). + + For example: + + ```r + if (condition) + a + + # Becomes: + if (condition) { + a + } + ``` + + ```r + fn <- function( + a, b + ) a + b + + # Becomes: + fn <- function( + a, + b + ) { + a + b + } + ``` + + Single line if statements and function definitions are still allowed in certain contexts: + + ```r + list(a = if (is.null(x)) NA else x) + + map(xs, function(x) x + 1) + ``` + # 0.5.0 diff --git a/crates/air_r_formatter/src/comments.rs b/crates/air_r_formatter/src/comments.rs index 7f07b2d5..0955580c 100644 --- a/crates/air_r_formatter/src/comments.rs +++ b/crates/air_r_formatter/src/comments.rs @@ -3,9 +3,12 @@ use air_r_syntax::AnyRExpression; use air_r_syntax::RArgument; use air_r_syntax::RArgumentNameClause; use air_r_syntax::RElseClause; +use air_r_syntax::RForStatement; +use air_r_syntax::RFunctionDefinition; use air_r_syntax::RIfStatement; use air_r_syntax::RLanguage; use air_r_syntax::RParenthesizedExpression; +use air_r_syntax::RRepeatStatement; use air_r_syntax::RSyntaxKind; use air_r_syntax::RSyntaxToken; use air_r_syntax::RWhileStatement; @@ -17,6 +20,7 @@ use biome_formatter::comments::Comments; use biome_formatter::comments::DecoratedComment; use biome_formatter::comments::SourceComment; use biome_formatter::write; +use biome_rowan::AstNode; use biome_rowan::SyntaxTriviaPieceComments; use comments::Directive; use comments::FormatDirective; @@ -90,105 +94,302 @@ impl CommentStyle for RCommentStyle { } fn handle_for_comment(comment: DecoratedComment) -> CommentPlacement { - let enclosing = comment.enclosing_node(); - - if enclosing.kind() != RSyntaxKind::R_FOR_STATEMENT { + let Some(for_statement) = RForStatement::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); - } + }; - if comment.text_position().is_own_line() { - // Lift comment up as a leading comment on the whole `R_FOR_STATEMENT` node - return CommentPlacement::leading(enclosing.clone(), comment); + // If the following node is the `body`, we want to lead the first element of the body. + // But we only want to do this if we are past the `)` of the loop sequence, so we have + // to check that the following token isn't `)` too. + // + // + // ```r + // for (i in 1:5) # dangles {} + // { + // } + // ``` + // + // ```r + // for (i in 1:5) # leads `a` + // { + // a + // } + // ``` + // + // Particularly important for prepping for autobracing here + // + // ```r + // for (i in 1:5) # leads `a` + // a + // ``` + // + // This is consistent to when there are already braces there. + // + // ```r + // for (i in 1:5) { # leads `a` + // a + // } + // ``` + // + // Here the following node is the `body`, but the following token is `)`. We want + // to avoid stealing this comment. + // + // ```r + // for (i in 1:5 # comment + // ) { + // } + // ``` + if let Ok(body) = for_statement.body() { + if let Some(following) = comment.following_node() { + if let Some(following_token) = comment.following_token() { + if body.syntax() == following && following_token.kind() != RSyntaxKind::R_PAREN { + return place_leading_or_dangling_body_comment(body, comment); + } + } + } } - CommentPlacement::Default(comment) + // Otherwise if the comment is "enclosed" by the `for_statement` then it can only be + // in one of a few places, none of which are particularly meaningful, so we move the + // comment to lead the whole `for_statement` + // + // ```r + // for # comment + // (i in 1:5) { + // } + // ``` + // + // ```r + // for ( # comment + // i in 1:5) { + // } + // ``` + // + // ```r + // for (i # comment + // in 1:5) { + // } + // ``` + // + // ```r + // for (i in # comment + // 1:5) { + // } + // ``` + // + // ```r + // for (i in + // # comment + // 1:5) { + // } + // ``` + // + // ```r + // for (i in 1:5 # comment + // ) { + // } + // ``` + CommentPlacement::leading(for_statement.into_syntax(), comment) } fn handle_while_comment(comment: DecoratedComment) -> CommentPlacement { - let Some(enclosing) = RWhileStatement::cast_ref(comment.enclosing_node()) else { + let Some(while_statement) = RWhileStatement::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); }; - if let Some(preceding) = comment.preceding_node() { - // Make comments directly before the condition `)` trailing - // comments of the condition itself (rather than leading comments of - // the `body` node) - // - // ```r - // while ( - // cond - // # comment - // ) { - // } - // ``` - if comment - .following_token() - .is_some_and(|token| token.kind() == RSyntaxKind::R_PAREN) - { - return CommentPlacement::trailing(preceding.clone(), comment); + // If the following node is the `body`, we want to lead the first element of the body. + // But we only want to do this if we are past the `)` of the loop sequence, so we have + // to check that the following token isn't `)` too. + // + // + // ```r + // while (condition) # dangles {} + // { + // } + // ``` + // + // ```r + // while (condition) # leads `a` + // { + // a + // } + // ``` + // + // Particularly important for prepping for autobracing here + // + // ```r + // while (condition) # leads `a` + // a + // ``` + // + // This is consistent to when there are already braces there. + // + // ```r + // while (condition) { # leads `a` + // a + // } + // ``` + // + // Here the following node is the `body`, but the following token is `)`. We want + // to avoid stealing this comment. + // + // ```r + // while (condition # comment + // ) { + // } + // ``` + if let Ok(body) = while_statement.body() { + if let Some(following) = comment.following_node() { + if let Some(following_token) = comment.following_token() { + if body.syntax() == following && following_token.kind() != RSyntaxKind::R_PAREN { + return place_leading_or_dangling_body_comment(body, comment); + } + } } } - // Check that the `body` of the while loop is identical to the `following` - // node of the comment. While loops also have a `condition` that can be - // any R expression, so we need to differentiate here. - let Ok(body) = enclosing.body() else { - return CommentPlacement::Default(comment); - }; - let Some(following) = comment.following_node() else { - return CommentPlacement::Default(comment); - }; - if body.syntax() != following { - return CommentPlacement::Default(comment); - } - - // Handle cases like: + // Otherwise if the comment is "enclosed" by the `while_statement` then it can only be + // in one of a few places, none of which are particularly meaningful, so we move the + // comment to lead the whole `while_statement` // // ```r - // while (a) # comment - // {} + // while # comment + // (condition) { + // } // ``` - place_leading_or_dangling_body_comment(body, comment) + // + // ```r + // while ( # comment + // condition) { + // } + // ``` + // + // ```r + // while ( + // # comment + // condition) { + // } + // ``` + // + // ```r + // while (condition # comment + // ) { + // } + // ``` + CommentPlacement::leading(while_statement.into_syntax(), comment) } fn handle_repeat_comment(comment: DecoratedComment) -> CommentPlacement { - if !matches!( - comment.enclosing_node().kind(), - RSyntaxKind::R_REPEAT_STATEMENT - ) { - return CommentPlacement::Default(comment); - }; - - // Repeat statements have a `repeat` token and a `body` field, and - // only the `body` can be an `AnyRExpression`. - let Some(body) = comment.following_node().and_then(AnyRExpression::cast_ref) else { + let Some(repeat_statement) = RRepeatStatement::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); }; - // Handle cases like: + // If the comment is "enclosed" by the `repeat_statement` then it can only be + // in one of a few places, all of which should move the comment onto the first + // element of the `body` // // ```r - // repeat # comment - // {} + // repeat # dangles {} + // { + // } + // ``` + // + // ```r + // repeat + // # dangles {} + // { + // } // ``` - place_leading_or_dangling_body_comment(body, comment) + // + // ```r + // repeat # leads `a` + // { + // a + // } + // ``` + // + // Particularly important for prepping for autobracing here + // + // ```r + // repeat # leads `a` + // a + // ``` + // + // This is consistent to when there are already braces there. + // + // ```r + // repeat { # leads `a` + // a + // } + // ``` + if let Ok(body) = repeat_statement.body() { + return place_leading_or_dangling_body_comment(body, comment); + } + + // We don't expect to ever fall through to here, but if we do for some unknown reason + // then we make the comment lead the whole `repeat_statement` to be consistent with + // `for_statement` and `while_statement` + CommentPlacement::leading(repeat_statement.into_syntax(), comment) } fn handle_function_comment(comment: DecoratedComment) -> CommentPlacement { - if !matches!( - comment.enclosing_node().kind(), - RSyntaxKind::R_FUNCTION_DEFINITION - ) { + let Some(function_definition) = RFunctionDefinition::cast_ref(comment.enclosing_node()) else { return CommentPlacement::Default(comment); }; - // Function definitions have `name`, `parameters`, and `body` fields, and - // only the `body` can be an `AnyRExpression`. - let Some(body) = comment.following_node().and_then(AnyRExpression::cast_ref) else { - return CommentPlacement::Default(comment); + // If the following node is the `parameters`, we make the comment lead the whole + // `function_definition` node + // + // ```r + // function # becomes leading on everything + // () a + // ``` + // + // ```r + // function + // # becomes leading on everything + // () a + // ``` + if let Ok(parameters) = function_definition.parameters() { + if let Some(following) = comment.following_node() { + if parameters.syntax() == following { + return CommentPlacement::leading(function_definition.into_syntax(), comment); + } + }; }; - place_leading_or_dangling_body_comment(body, comment) + // If the following node is the `body`, we make the comment lead the first node of + // the `body`. + // + // ```r + // function() # becomes leading on `a` + // a + // ``` + // + // ```r + // function() # becomes leading on `a` + // { + // a + // } + // ``` + // + // This is consistent with what happens when the comment is already after an opening + // `{` + // + // ```r + // function() { # becomes leading on `a` + // a + // } + // ``` + if let Ok(body) = function_definition.body() { + if let Some(following) = comment.following_node() { + if body.syntax() == following { + return place_leading_or_dangling_body_comment(body, comment); + } + }; + }; + + CommentPlacement::Default(comment) } fn handle_if_statement_comment( diff --git a/crates/air_r_formatter/src/lib.rs b/crates/air_r_formatter/src/lib.rs index fba98faf..dbef7efd 100644 --- a/crates/air_r_formatter/src/lib.rs +++ b/crates/air_r_formatter/src/lib.rs @@ -24,10 +24,10 @@ mod cst; pub mod either; pub mod formatter_ext; pub mod joiner_ext; +pub mod loop_body; mod prelude; mod r; pub(crate) mod separated; -mod statement_body; mod string_literal; #[rustfmt::skip] diff --git a/crates/air_r_formatter/src/loop_body.rs b/crates/air_r_formatter/src/loop_body.rs new file mode 100644 index 00000000..99919c94 --- /dev/null +++ b/crates/air_r_formatter/src/loop_body.rs @@ -0,0 +1,33 @@ +use crate::prelude::*; +use air_r_syntax::AnyRExpression; +use biome_formatter::format_args; +use biome_formatter::write; + +pub(crate) struct FormatLoopBody<'a> { + node: &'a AnyRExpression, +} + +impl<'a> FormatLoopBody<'a> { + pub fn new(node: &'a AnyRExpression) -> Self { + Self { node } + } +} + +impl Format for FormatLoopBody<'_> { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + if matches!(&self.node, AnyRExpression::RBracedExpressions(_)) { + // If it's already braced, just write it + write!(f, [self.node.format()]) + } else { + // Otherwise, brace it and block indent the body + write!( + f, + [ + text("{"), + block_indent(&format_args![&self.node.format()]), + text("}") + ] + ) + } + } +} diff --git a/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs b/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs index d68f9f81..73fad7df 100644 --- a/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs +++ b/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs @@ -123,10 +123,31 @@ impl RCallLikeArguments { /// /// Example: /// + /// An inline function without braces is an example where "most flat" is considered. + /// We consider this as groupable because it is possible that a long function body + /// or long function signature can cause a break, which may benefit from the middle + /// variant before expanding all the way to the most expanded variant. + /// /// ```r - /// # NOTE: Not currently possible, as the `{}` always force a break right now, - /// # but this would be an example if `{}` didn't force a break. - /// map(xs, function(x) {}) + /// # Most flat wins + /// map(xs, function(x) x + 1) + /// + /// # Middle variant wins + /// map(xs, function(x) x + a_really_long_thing_here + a_really_long_thing_there) + /// # -> + /// map(xs, function(x) { + /// x + a_really_long_thing_here + a_really_long_thing_there + /// }) + /// + /// # Most expanded wins (middle variant wouldn't fit within line length) + /// map(xs_are_extremely_long_too, function(this_argument_is_really_long, option = "so is this") this_argument_is_really_long) + /// # -> + /// map( + /// xs_are_extremely_long_too, + /// function(this_argument_is_really_long, option = "so is this") { + /// this_argument_is_really_long + /// } + /// ) /// ``` /// /// This variant is removed from the set if we detect that the grouped argument @@ -134,14 +155,28 @@ impl RCallLikeArguments { /// parameters, we bail entirely and use the most expanded form, as noted /// at the beginning of this documentation page). /// - /// Note that because `{}` currently unconditionally force a break, and because - /// we only go down this path when we have a `{}` to begin with, that means that - /// currently the most flat variant is always removed. There is an - /// `unreachable!()` in the code to assert this. We can't simply remove the - /// `most_flat` code path though, because it is also where we detect if a - /// parameter forces a break, triggering one of our early exists. Additionally, - /// in the future we may allow `{}` to not force a break, meaning this variant - /// may come back into play. + /// ```r + /// # Forced break in the body + /// map(xs, function(x) { + /// x + /// }) + /// + /// # This is a forced break in the body too, and triggers autobracing and + /// # results in the middle variant winning + /// map(xs, function(x) + /// x) + /// ``` + /// + /// Note that because `{}` currently unconditionally force a break, that means that + /// currently the most flat variant is always removed when `{}` are present. In the + /// future we will make empty `{}` never break and we will declare cases of empty + /// `{}` as not groupable, so it won't go through this best fitting process at all. + /// + /// ```r + /// # NOTE: Currently considered groupable, but the `{}` currently unconditionally + /// # force a break, so the `most_flat` variant is always removed + /// map(xs, function(x) {}) + /// ``` /// /// ```r /// # NOTE: We explicitly disallow curly-curly as a groupable argument, @@ -297,7 +332,7 @@ impl RCallLikeArguments { // Write the most flat variant with the first or last argument grouped // (but not forcibly expanded) - let _most_flat = { + let most_flat = { let snapshot = f.state_snapshot(); let mut buffer = VecBuffer::new(f.state_mut()); buffer.write_element(FormatElement::Tag(Tag::StartEntry))?; @@ -375,14 +410,15 @@ impl RCallLikeArguments { buffer.into_vec().into_boxed_slice() }; - // If the grouped content breaks, then we can skip the most_flat variant, - // since we already know that it won't be fitting on a single line. let variants = if grouped_breaks { + // If the grouped content breaks, then we can skip the most_flat variant, + // since we already know that it won't be fitting on a single line. We can + // also go ahead and signal that we will be expanding. write!(f, [expand_parent()])?; vec![middle_variant, most_expanded.into_boxed_slice()] } else { - unreachable!("`grouped_breaks` is currently always `true`."); - // vec![most_flat, middle_variant, most_expanded.into_boxed_slice()] + // Otherwise we best fit between all variants + vec![most_flat, middle_variant, most_expanded.into_boxed_slice()] }; // SAFETY: Safe because variants is guaranteed to contain >=2 entries: @@ -412,7 +448,8 @@ impl Format for RCallLikeArguments { // ) // ``` // - // If we don't handle it specially, it can break idempotence + // If we don't handle it specially, it can break idempotence. + // Same as `RParameters`. if items.is_empty() { return write!( f, @@ -1086,7 +1123,7 @@ fn argument_is_groupable(argument: &AnyRExpression) -> SyntaxResult { // }) // ``` // - // Empty braces always expand, so they benefit from grouping + // Empty braces currently always expand, so they benefit from grouping // // ```r // with(data, { @@ -1097,7 +1134,7 @@ fn argument_is_groupable(argument: &AnyRExpression) -> SyntaxResult { // // ```r // with(data, { - // // comment + // # comment // }) // ``` // @@ -1118,15 +1155,91 @@ fn argument_is_groupable(argument: &AnyRExpression) -> SyntaxResult { // ``` RBracedExpressions(node) => as_curly_curly(node).is_none(), + // Currently, every kind of function definition can benefit from grouping. + // + // - If the body has `{}` and is not empty, the braces will be expanded across + // multiple lines, so it can benefit from grouping. + // + // - If the body does not have `{}`, we may keep the function definition + // completely flat on one line, or we may expand it over multiple lines if + // it exceeds the line length or a persistent line break forces a break. If + // it were to break, it would benefit from grouping. + // + // - NOTE: If the body has `{}` and is empty (including no comments), then + // currently those are always expanded anyways, so it can benefit from grouping. + // In the future we will force empty `{}` to not expand, and when that happens + // we should return `false` from here to signal that it can't benefit from + // grouping (use the same rule as `RBracedExpressions` above, but applied to + // the `node.body()`). + // + // Here are some examples: + // + // Trailing function definition with braces is the classic example of something + // that benefits from grouping + // // ```r - // map(a, function(x) { + // map(xs, function(x) { // x // }) // ``` - RFunctionDefinition(node) => { - let body = node.body()?; - matches!(&body, AnyRExpression::RBracedExpressions(_)) - } + // + // When the braces are empty, it still benefits from grouping right now because + // we currently always expand the braces. + // + // ```r + // map(xs, function(x) { + // }) + // ``` + // + // With a dangling comment in empty braces, we always benefit from grouping + // + // ```r + // map(xs, function(x) { + // # comment + // }) + // ``` + // + // Long function definition that would be split over multiple lines, triggering + // autobracing, and would benefit from grouping + // + // ```r + // map(xs, function(x) x + a_really_really_long_name + another_really_long_name) + // + // # Becomes: + // map(xs, function(x) { + // x + a_really_really_long_name + another_really_long_name + // }) + // + // # Which is preferred over fully expanding: + // map( + // xs, + // function(x) { + // x + a_really_really_long_name + another_really_long_name + // } + // ) + // ``` + // + // Line break in the function definition body, triggering autobracing, and would + // benefit from grouping + // + // ```r + // map(xs, function(x) + // x + 1) + // + // # Becomes: + // map(xs, function(x) { + // x + 1 + // }) + // + // # Which is preferred over fully expanding: + // map( + // xs, + // function(x) { + // x + 1 + // } + // ) + // ``` + RFunctionDefinition(_) => true, // Nothing else benefits from grouping _ => false, diff --git a/crates/air_r_formatter/src/r/auxiliary/for_statement.rs b/crates/air_r_formatter/src/r/auxiliary/for_statement.rs index 0eb61087..f0c45d04 100644 --- a/crates/air_r_formatter/src/r/auxiliary/for_statement.rs +++ b/crates/air_r_formatter/src/r/auxiliary/for_statement.rs @@ -1,5 +1,5 @@ +use crate::loop_body::FormatLoopBody; use crate::prelude::*; -use crate::statement_body::FormatStatementBody; use air_r_syntax::RForStatement; use air_r_syntax::RForStatementFields; use biome_formatter::format_args; @@ -31,7 +31,8 @@ impl FormatNodeRule for FormatRForStatement { space(), sequence.format(), r_paren_token.format(), - FormatStatementBody::new(&body?) + space(), + FormatLoopBody::new(&body?) ))] ) } diff --git a/crates/air_r_formatter/src/r/auxiliary/function_definition.rs b/crates/air_r_formatter/src/r/auxiliary/function_definition.rs index cb052f97..e95e1cd2 100644 --- a/crates/air_r_formatter/src/r/auxiliary/function_definition.rs +++ b/crates/air_r_formatter/src/r/auxiliary/function_definition.rs @@ -1,9 +1,11 @@ use crate::prelude::*; -use crate::statement_body::FormatStatementBody; +use air_r_syntax::AnyRExpression; use air_r_syntax::RFunctionDefinition; +use biome_formatter::format_args; use biome_formatter::write; use biome_formatter::FormatRuleWithOptions; use biome_formatter::RemoveSoftLinesBuffer; +use biome_rowan::SyntaxResult; use super::call_arguments::GroupedCallArgumentLayout; @@ -82,13 +84,76 @@ impl FormatFunction { Ok(()) }); + // The `group()` should contain all elements of the function definition, + // which allows `FormatFunctionBody` to autobrace if any element forces a + // break write!( f, - [ + [group(&format_args!( name.format(), &format_parameters, - group(&FormatStatementBody::new(&body)) - ] + space(), + FormatFunctionBody::new(&body) + ))] ) } } + +pub(crate) struct FormatFunctionBody<'a> { + node: &'a AnyRExpression, +} + +impl<'a> FormatFunctionBody<'a> { + pub fn new(node: &'a AnyRExpression) -> Self { + Self { node } + } +} + +impl Format for FormatFunctionBody<'_> { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + match self.node { + // Body already has braces, just format it + AnyRExpression::RBracedExpressions(node) => { + write!(f, [node.format()]) + } + // Body does not have braces yet + node => { + if should_force_braced_expressions(node)? { + write!( + f, + [ + text("{"), + block_indent(&format_args![&node.format()]), + text("}") + ] + ) + } else { + write!( + f, + [ + if_group_breaks(&text("{")), + soft_block_indent(&format_args![&node.format()]), + if_group_breaks(&text("}")) + ] + ) + } + } + } + } +} + +fn should_force_braced_expressions(node: &AnyRExpression) -> SyntaxResult { + // A kind of persistent line break, but not one we respect + // with the `persistent_line_breaks` setting, because it is + // more related to whether or not we autobrace + // + // ```r + // function() + // a + // ``` + if node.syntax().has_leading_newline() { + return Ok(true); + } + + Ok(false) +} diff --git a/crates/air_r_formatter/src/r/auxiliary/parameters.rs b/crates/air_r_formatter/src/r/auxiliary/parameters.rs index fb2d6cf3..c9065ee2 100644 --- a/crates/air_r_formatter/src/r/auxiliary/parameters.rs +++ b/crates/air_r_formatter/src/r/auxiliary/parameters.rs @@ -16,6 +16,27 @@ impl FormatNodeRule for FormatRParameters { r_paren_token, } = node.as_fields(); + // Special case where the dangling comment has no node to attach to: + // + // ```r + // function( + // # dangling comment + // ) {} + // ``` + // + // If we don't handle it specially, it can break idempotence. + // Same as `RCallLikeArguments`. + if items.is_empty() { + return write!( + f, + [ + l_paren_token.format(), + format_dangling_comments(node.syntax()).with_soft_block_indent(), + r_paren_token.format() + ] + ); + } + write!( f, [group(&format_args![ @@ -26,6 +47,12 @@ impl FormatNodeRule for FormatRParameters { .should_expand(has_persistent_line_break(&items, f.options()))] ) } + + fn fmt_dangling_comments(&self, _: &RParameters, _: &mut RFormatter) -> FormatResult<()> { + // Formatted inside of `fmt_fields` + // Only applicable for the empty arguments case + Ok(()) + } } /// Check if the user has inserted a persistent line break before the very first `parameter`. diff --git a/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs b/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs index 188a4149..9522380c 100644 --- a/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs +++ b/crates/air_r_formatter/src/r/auxiliary/repeat_statement.rs @@ -1,5 +1,5 @@ +use crate::loop_body::FormatLoopBody; use crate::prelude::*; -use crate::statement_body::FormatStatementBody; use air_r_syntax::RRepeatStatement; use air_r_syntax::RRepeatStatementFields; use biome_formatter::format_args; @@ -15,7 +15,8 @@ impl FormatNodeRule for FormatRRepeatStatement { f, [group(&format_args!( repeat_token.format(), - FormatStatementBody::new(&body?) + space(), + FormatLoopBody::new(&body?) ))] ) } diff --git a/crates/air_r_formatter/src/r/auxiliary/while_statement.rs b/crates/air_r_formatter/src/r/auxiliary/while_statement.rs index 8b200808..b7060114 100644 --- a/crates/air_r_formatter/src/r/auxiliary/while_statement.rs +++ b/crates/air_r_formatter/src/r/auxiliary/while_statement.rs @@ -1,5 +1,5 @@ +use crate::loop_body::FormatLoopBody; use crate::prelude::*; -use crate::statement_body::FormatStatementBody; use air_r_syntax::RWhileStatement; use air_r_syntax::RWhileStatementFields; use biome_formatter::format_args; @@ -25,7 +25,8 @@ impl FormatNodeRule for FormatRWhileStatement { l_paren_token.format(), group(&soft_block_indent(&condition.format())), r_paren_token.format(), - FormatStatementBody::new(&body?) + space(), + FormatLoopBody::new(&body?) ))] ) } diff --git a/crates/air_r_formatter/src/statement_body.rs b/crates/air_r_formatter/src/statement_body.rs deleted file mode 100644 index db4dd00c..00000000 --- a/crates/air_r_formatter/src/statement_body.rs +++ /dev/null @@ -1,46 +0,0 @@ -use crate::prelude::*; -use air_r_syntax::AnyRExpression; -use biome_formatter::write; - -pub(crate) struct FormatStatementBody<'a> { - body: &'a AnyRExpression, -} - -impl<'a> FormatStatementBody<'a> { - pub fn new(body: &'a AnyRExpression) -> Self { - Self { body } - } -} - -// TODO!: Repurpose this as `FormatBracedBody` for use in: -// - For loops (unconditionally) -// - Repeat loops (unconditionally) -// - While loops (unconditionally) -// - Function definitions -// - Includes anonymous functions -// - Allow 1 liner function definitions -// - Definitely breaks if argument list expands over multiple lines -// - Use `if_group_breaks(&text("{"))` to add missing `{}` if the group breaks, -// like if statements. Probably won't be able to use simple `FormatBracedBody` -impl Format for FormatStatementBody<'_> { - fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - use AnyRExpression::*; - - // We only want a single space between the construct and the statement - // body if the body is a braced expression. The expression will handle - // the line break as required. Otherwise, we handle the soft line - // indent or space for all other syntax. - // - // for (x in xs) {} - // |--| statement body - // - // for (x in xs) - // a - // |--| statement body - if matches!(&self.body, RBracedExpressions(_)) { - write!(f, [space(), self.body.format()]) - } else { - write!(f, [soft_line_indent_or_space(&self.body.format())]) - } - } -} diff --git a/crates/air_r_formatter/tests/specs/r/call.R b/crates/air_r_formatter/tests/specs/r/call.R index 8b215a43..1bc70d56 100644 --- a/crates/air_r_formatter/tests/specs/r/call.R +++ b/crates/air_r_formatter/tests/specs/r/call.R @@ -340,23 +340,78 @@ map(xs, function(x) { # Braces expand over multiple lines map(xs, function(x) { }) -# This should stay where it is +# Best fitting is used to choose the most flat variant, and this stays +# as is map(xs, function(x) x) -# This form is too wide, so it fully expands -map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { +# Best fitting is used to choose the most expanded variant over +# the current middle variant, because the middle variant exceeds +# the line length +map(my_long_list_my_long_list_my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { my_long_body_my_long_body_my_long_body_my_long_body_my_long_body }) -# Parameter names are very long, so it fully expands -# (Note that this uses best-fitting. The `parameters` themselves don't force a -# break, but when a best-fit choice is made between this form with no -# soft-indents allowed in the `parameters` and the fully expanded form, the -# fully expanded form wins) +# Best fitting is used to choose the most expanded variant over +# the current middle variant. Remember no soft-indents are allowed in the +# `parameters` when looking at options for the middle variant, so the middle +# variant can't choose to put each parameter on its own line in the current +# form. Only the most expanded form can do that when fully breaking everything. map(x, function(a, a_really_really_long_parameter, and_another_one_here_too_wow_this_is_long) { 1 }) +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES NOT kick in as it expands in this case. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_argument) + +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES kick in as it expands in this case as it fully +# expands the function definition too to keep it within the line length. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_list_my_long_list_my_long_list_my_long_list_my_long_list) + +# Best fitting is used to choose the middle variant over the current most flat +# form. The middle variant fits within the line length, so is preferred over +# the most expanded form. +map(xs, function(my_long_argument) my_long_argument + my_extra_long_extra_argument) + +# Best fitting is used to choose the middle variant. The line break forces +# autobracing, which means the function definition breaks. That rules out +# the most flat variant, and then the middle variant is preferred over most +# expanded because it fits in the line length. +map(xs, function(x, option = "a") + x +) + +# Best fitting is attempted, but we bail early because the persistent line +# break causes the function definition `parameters` to expand, and if that +# happens we fully expand +map(xs, function( + x, option = "a") { + x +}) + +# Best fitting is not used here. The `xs` already has a line break, so +# we stay fully expanded. The function parameters are flattened though. +map( + xs, + function(x, + option = "a" + ) { + x + } +) + +# Best fitting is used to choose the middle variant. This is not a persistent +# line break location, so the function `parameters` don't actually break here. +# And then we choose middle over most expanded because it fits within the line +# length. +map(xs, function(x, + option = "a") { + x +}) + # The `{ 1 }` parameter would force a hard line break. We detect this and don't # use best-fitting. Instead we fall back to the most expanded form. map(x, function(a = { 1 }) { diff --git a/crates/air_r_formatter/tests/specs/r/call.R.snap b/crates/air_r_formatter/tests/specs/r/call.R.snap index af32020a..6238ff37 100644 --- a/crates/air_r_formatter/tests/specs/r/call.R.snap +++ b/crates/air_r_formatter/tests/specs/r/call.R.snap @@ -347,23 +347,78 @@ map(xs, function(x) { # Braces expand over multiple lines map(xs, function(x) { }) -# This should stay where it is +# Best fitting is used to choose the most flat variant, and this stays +# as is map(xs, function(x) x) -# This form is too wide, so it fully expands -map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { +# Best fitting is used to choose the most expanded variant over +# the current middle variant, because the middle variant exceeds +# the line length +map(my_long_list_my_long_list_my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { my_long_body_my_long_body_my_long_body_my_long_body_my_long_body }) -# Parameter names are very long, so it fully expands -# (Note that this uses best-fitting. The `parameters` themselves don't force a -# break, but when a best-fit choice is made between this form with no -# soft-indents allowed in the `parameters` and the fully expanded form, the -# fully expanded form wins) +# Best fitting is used to choose the most expanded variant over +# the current middle variant. Remember no soft-indents are allowed in the +# `parameters` when looking at options for the middle variant, so the middle +# variant can't choose to put each parameter on its own line in the current +# form. Only the most expanded form can do that when fully breaking everything. map(x, function(a, a_really_really_long_parameter, and_another_one_here_too_wow_this_is_long) { 1 }) +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES NOT kick in as it expands in this case. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_argument) + +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES kick in as it expands in this case as it fully +# expands the function definition too to keep it within the line length. +map(my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) my_long_list_my_long_list_my_long_list_my_long_list_my_long_list) + +# Best fitting is used to choose the middle variant over the current most flat +# form. The middle variant fits within the line length, so is preferred over +# the most expanded form. +map(xs, function(my_long_argument) my_long_argument + my_extra_long_extra_argument) + +# Best fitting is used to choose the middle variant. The line break forces +# autobracing, which means the function definition breaks. That rules out +# the most flat variant, and then the middle variant is preferred over most +# expanded because it fits in the line length. +map(xs, function(x, option = "a") + x +) + +# Best fitting is attempted, but we bail early because the persistent line +# break causes the function definition `parameters` to expand, and if that +# happens we fully expand +map(xs, function( + x, option = "a") { + x +}) + +# Best fitting is not used here. The `xs` already has a line break, so +# we stay fully expanded. The function parameters are flattened though. +map( + xs, + function(x, + option = "a" + ) { + x + } +) + +# Best fitting is used to choose the middle variant. This is not a persistent +# line break location, so the function `parameters` don't actually break here. +# And then we choose middle over most expanded because it fits within the line +# length. +map(xs, function(x, + option = "a") { + x +}) + # The `{ 1 }` parameter would force a hard line break. We detect this and don't # use best-fitting. Instead we fall back to the most expanded form. map(x, function(a = { 1 }) { @@ -1187,22 +1242,25 @@ map(xs, function(x) { map(xs, function(x) { }) -# This should stay where it is +# Best fitting is used to choose the most flat variant, and this stays +# as is map(xs, function(x) x) -# This form is too wide, so it fully expands +# Best fitting is used to choose the most expanded variant over +# the current middle variant, because the middle variant exceeds +# the line length map( - my_long_list_my_long_list_my_long_list_my_long_list, + my_long_list_my_long_list_my_long_list_my_long_list_my_long_list_my_long_list, function(my_long_argument) { my_long_body_my_long_body_my_long_body_my_long_body_my_long_body } ) -# Parameter names are very long, so it fully expands -# (Note that this uses best-fitting. The `parameters` themselves don't force a -# break, but when a best-fit choice is made between this form with no -# soft-indents allowed in the `parameters` and the fully expanded form, the -# fully expanded form wins) +# Best fitting is used to choose the most expanded variant over +# the current middle variant. Remember no soft-indents are allowed in the +# `parameters` when looking at options for the middle variant, so the middle +# variant can't choose to put each parameter on its own line in the current +# form. Only the most expanded form can do that when fully breaking everything. map( x, function( @@ -1214,6 +1272,70 @@ map( } ) +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES NOT kick in as it expands in this case. +map( + my_long_list_my_long_list_my_long_list_my_long_list, + function(my_long_argument) my_long_argument +) + +# Best fitting is used to choose most expanded here. Even with the middle +# variant, the first map() argument and the function signature would exceed the +# line length. Autobracing DOES kick in as it expands in this case as it fully +# expands the function definition too to keep it within the line length. +map( + my_long_list_my_long_list_my_long_list_my_long_list, + function(my_long_argument) { + my_long_list_my_long_list_my_long_list_my_long_list_my_long_list + } +) + +# Best fitting is used to choose the middle variant over the current most flat +# form. The middle variant fits within the line length, so is preferred over +# the most expanded form. +map(xs, function(my_long_argument) { + my_long_argument + my_extra_long_extra_argument +}) + +# Best fitting is used to choose the middle variant. The line break forces +# autobracing, which means the function definition breaks. That rules out +# the most flat variant, and then the middle variant is preferred over most +# expanded because it fits in the line length. +map(xs, function(x, option = "a") { + x +}) + +# Best fitting is attempted, but we bail early because the persistent line +# break causes the function definition `parameters` to expand, and if that +# happens we fully expand +map( + xs, + function( + x, + option = "a" + ) { + x + } +) + +# Best fitting is not used here. The `xs` already has a line break, so +# we stay fully expanded. The function parameters are flattened though. +map( + xs, + function(x, option = "a") { + x + } +) + +# Best fitting is used to choose the middle variant. This is not a persistent +# line break location, so the function `parameters` don't actually break here. +# And then we choose middle over most expanded because it fits within the line +# length. +map(xs, function(x, option = "a") { + x +}) + # The `{ 1 }` parameter would force a hard line break. We detect this and don't # use best-fitting. Instead we fall back to the most expanded form. map( @@ -1689,8 +1811,8 @@ foo(bar[ # Lines exceeding max width of 80 characters ``` 307: my_long_list_my_long_list_my_long_list_my_long_list_long_long_long_long_long_list, - 701: foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz - 705: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz())) - 708: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( - 721: name = foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( + 768: foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz + 772: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz())) + 775: c(list(foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( + 788: name = foobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbafoobarbazzzzzzzzzfoobarbaz( ``` diff --git a/crates/air_r_formatter/tests/specs/r/for_statement.R b/crates/air_r_formatter/tests/specs/r/for_statement.R index bdd2356c..0eb90bdb 100644 --- a/crates/air_r_formatter/tests/specs/r/for_statement.R +++ b/crates/air_r_formatter/tests/specs/r/for_statement.R @@ -1,11 +1,75 @@ -for(x in y) x + y +for(x in xs) { + x + 1 +} -for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) 1 +for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { + a_really_long_argument_name +} -# own-line comments get lifted up +# ------------------------------------------------------------------------------ +# Autobracing + +for(x in xs) {} + +# Unconditional autobracing on for loop bodies to maximize clarity and intent +for(x in xs) x +for(x in xs) x + y + +# ------------------------------------------------------------------------------ +# Comments + +for # leads for loop +(i in 1:5) { +} + +for ( # leads for loop +i in 1:5) { +} + +for (i # leads for loop +in 1:5) { +} + +for (i in # leads for loop +1:5) { +} + +for (i in +# leads for loop +1:5) { +} + +for (i in 1:5 # leads for loop +) { +} + +for (i in 1:5) # dangles {} + { + } + +for (i in 1:5) # leads a +{ + a +} + +for (i in 1:5) { # leads a + a +} + +for (i in 1:5) i # trails whole for loop + +for (i in 1:5) { i } # trails whole for loop + +# Comments 1-3 lead the whole for loop +# Comments 4-5 move to lead the body for ( # comment1 # comment2 a in 1 - ) # comment3 - a + # comment3 + ) # comment4 + # comment5 + { + # comment6 + a + } \ No newline at end of file diff --git a/crates/air_r_formatter/tests/specs/r/for_statement.R.snap b/crates/air_r_formatter/tests/specs/r/for_statement.R.snap index 570e2f39..fc3a86bb 100644 --- a/crates/air_r_formatter/tests/specs/r/for_statement.R.snap +++ b/crates/air_r_formatter/tests/specs/r/for_statement.R.snap @@ -5,18 +5,81 @@ info: r/for_statement.R # Input ```R -for(x in y) x + y +for(x in xs) { + x + 1 +} -for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) 1 +for(a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { + a_really_long_argument_name +} -# own-line comments get lifted up +# ------------------------------------------------------------------------------ +# Autobracing + +for(x in xs) {} + +# Unconditional autobracing on for loop bodies to maximize clarity and intent +for(x in xs) x +for(x in xs) x + y + +# ------------------------------------------------------------------------------ +# Comments + +for # leads for loop +(i in 1:5) { +} + +for ( # leads for loop +i in 1:5) { +} + +for (i # leads for loop +in 1:5) { +} + +for (i in # leads for loop +1:5) { +} + +for (i in +# leads for loop +1:5) { +} + +for (i in 1:5 # leads for loop +) { +} + +for (i in 1:5) # dangles {} + { + } + +for (i in 1:5) # leads a +{ + a +} + +for (i in 1:5) { # leads a + a +} + +for (i in 1:5) i # trails whole for loop + +for (i in 1:5) { i } # trails whole for loop + +# Comments 1-3 lead the whole for loop +# Comments 4-5 move to lead the body for ( # comment1 # comment2 a in 1 - ) # comment3 - a - + # comment3 + ) # comment4 + # comment5 + { + # comment6 + a + } ``` @@ -36,19 +99,91 @@ Skip: None ----- ```R -for (x in y) x + y +for (x in xs) { + x + 1 +} + +for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { + a_really_long_argument_name +} + +# ------------------------------------------------------------------------------ +# Autobracing + +for (x in xs) { +} + +# Unconditional autobracing on for loop bodies to maximize clarity and intent +for (x in xs) { + x +} +for (x in xs) { + x + y +} + +# ------------------------------------------------------------------------------ +# Comments + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +# leads for loop +for (i in 1:5) { +} + +for (i in 1:5) { + # dangles {} +} + +for (i in 1:5) { + # leads a + a +} + +for (i in 1:5) { + # leads a + a +} + +for (i in 1:5) { + i +} # trails whole for loop -for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) - 1 +for (i in 1:5) { + i +} # trails whole for loop -# own-line comments get lifted up +# Comments 1-3 lead the whole for loop +# Comments 4-5 move to lead the body # comment1 # comment2 -for (a in 1) # comment3 +# comment3 +for (a in 1) { + # comment4 + # comment5 + # comment6 a +} ``` # Lines exceeding max width of 80 characters ``` - 3: for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) + 5: for (a_really_long_argument_name in but_we_dont_ever_break_inside_for_conditions_no_matter_how_long) { ``` diff --git a/crates/air_r_formatter/tests/specs/r/function_definition.R b/crates/air_r_formatter/tests/specs/r/function_definition.R index 20063b80..7a796f1c 100644 --- a/crates/air_r_formatter/tests/specs/r/function_definition.R +++ b/crates/air_r_formatter/tests/specs/r/function_definition.R @@ -1,44 +1,129 @@ +# ------------------------------------------------------------------------ +# Miscellaneous + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) { + a +} + +function(a = { + 1 +}, b) { + 1 +} + +# ------------------------------------------------------------------------ +# Autobracing + +# These stay flat function() 1 function(a, b) 1 +\() 1 +\(a, b) 1 -function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) 1 +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b) 1 -function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on +\( + a, b) 1 -function(a = { +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b +) 1 + +# Persistent line break in `body` triggers autobracing +function(a, b) 1 -}, b) { + +\(a, b) 1 + +# This snaps back to one line +function +(a, b) 1 + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) a + +function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on + +# ------------------------------------------------------------------------ +# Comments + +function # leads function +() { +} + +function +# leads function +() { +} + +function( # dangles () +) { +} + +function( + # dangles () +) { } function() { - # comment + # dangles {} } -function() # becomes leading on `1 + 1` +function() a # trails function + +function() # leads `a` { - 1 + 1 + a } -function() # becomes leading on `1 + 1` +function() # leads `a` { # an inner comment - 1 + 1 + a } -function() # becomes dangling on the `{}` +function() # dangles {} { } -function() # becomes dangling on the `{}` +function() # dangles {} { # an inner comment but empty `{}` } -function() # becomes leading on `1 + 1` - 1 + 1 +function() # leads `a` + a + +# Not much we can do here, it's not enclosed by the `function_definition` node +# so it ends up trailing the `}` of the function. This is consistent with +# non-enclosed comments in if/else and loops. +function() + a # trails function -\(x, y) 1 +function( + # leads `a` + a +) { + # comment +} + +function( + a # trails `a` +) { + # comment +} + +function( + a + # trails `a` +) { + # comment +} # ------------------------------------------------------------------------ # User requested line break @@ -94,31 +179,3 @@ fn <- function(a, b = c( 1, 2, 3)) { body } - -# ------------------------------------------------------------------------ -# User requested line break and trailing anonymous functions in calls - -# Ensure these features play nicely together - -# This user line break expands the function definition, causing the whole -# `map()` to expand -map(xs, function( - x, option = "a") { - x -}) - -# This flattens the function definition, but the `map()` stays expanded -map( - xs, - function(x, - option = "a" - ) { - x - } -) - -# This flattens to one line -map(xs, function(x, - option = "a") { - x -}) diff --git a/crates/air_r_formatter/tests/specs/r/function_definition.R.snap b/crates/air_r_formatter/tests/specs/r/function_definition.R.snap index 526ab59a..3d9df5c8 100644 --- a/crates/air_r_formatter/tests/specs/r/function_definition.R.snap +++ b/crates/air_r_formatter/tests/specs/r/function_definition.R.snap @@ -5,47 +5,132 @@ info: r/function_definition.R # Input ```R +# ------------------------------------------------------------------------ +# Miscellaneous + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) { + a +} + +function(a = { + 1 +}, b) { + 1 +} + +# ------------------------------------------------------------------------ +# Autobracing + +# These stay flat function() 1 function(a, b) 1 +\() 1 +\(a, b) 1 -function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) 1 +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b) 1 -function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on +\( + a, b) 1 -function(a = { +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, b +) 1 + +# Persistent line break in `body` triggers autobracing +function(a, b) 1 -}, b) { + +\(a, b) 1 + +# This snaps back to one line +function +(a, b) 1 + +function(a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this) a + +function(a_really_long_argument_name_to_break_on, and_this) a_really_long_argument_name_to_break_on + +# ------------------------------------------------------------------------ +# Comments + +function # leads function +() { +} + +function +# leads function +() { +} + +function( # dangles () +) { +} + +function( + # dangles () +) { } function() { - # comment + # dangles {} } -function() # becomes leading on `1 + 1` +function() a # trails function + +function() # leads `a` { - 1 + 1 + a } -function() # becomes leading on `1 + 1` +function() # leads `a` { # an inner comment - 1 + 1 + a } -function() # becomes dangling on the `{}` +function() # dangles {} { } -function() # becomes dangling on the `{}` +function() # dangles {} { # an inner comment but empty `{}` } -function() # becomes leading on `1 + 1` - 1 + 1 +function() # leads `a` + a + +# Not much we can do here, it's not enclosed by the `function_definition` node +# so it ends up trailing the `}` of the function. This is consistent with +# non-enclosed comments in if/else and loops. +function() + a # trails function -\(x, y) 1 +function( + # leads `a` + a +) { + # comment +} + +function( + a # trails `a` +) { + # comment +} + +function( + a + # trails `a` +) { + # comment +} # ------------------------------------------------------------------------ # User requested line break @@ -102,34 +187,6 @@ fn <- function(a, b = c( body } -# ------------------------------------------------------------------------ -# User requested line break and trailing anonymous functions in calls - -# Ensure these features play nicely together - -# This user line break expands the function definition, causing the whole -# `map()` to expand -map(xs, function( - x, option = "a") { - x -}) - -# This flattens the function definition, but the `map()` stays expanded -map( - xs, - function(x, - option = "a" - ) { - x - } -) - -# This flattens to one line -map(xs, function(x, - option = "a") { - x -}) - ``` @@ -149,17 +206,16 @@ Skip: None ----- ```R -function() 1 -function(a, b) 1 +# ------------------------------------------------------------------------ +# Miscellaneous function( a_really_long_argument_name_to_break_on, and_here_is_another_one_please_break_me, and_this -) 1 - -function(a_really_long_argument_name_to_break_on, and_this) - a_really_long_argument_name_to_break_on +) { + a +} function( a = { @@ -170,35 +226,142 @@ function( 1 } +# ------------------------------------------------------------------------ +# Autobracing + +# These stay flat +function() 1 +function(a, b) 1 +\() 1 +\(a, b) 1 + +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, + b +) { + 1 +} + +\( + a, + b +) { + 1 +} + +# Persistent line break in `parameters` triggers autobracing because the `body` +# is in the same `group()` +function( + a, + b +) { + 1 +} + +# Persistent line break in `body` triggers autobracing +function(a, b) { + 1 +} + +\(a, b) { + 1 +} + +# This snaps back to one line +function(a, b) 1 + +function( + a_really_long_argument_name_to_break_on, + and_here_is_another_one_please_break_me, + and_this +) { + a +} + +function(a_really_long_argument_name_to_break_on, and_this) { + a_really_long_argument_name_to_break_on +} + +# ------------------------------------------------------------------------ +# Comments + +# leads function function() { - # comment } +# leads function function() { - # becomes leading on `1 + 1` - 1 + 1 +} + +function( + # dangles () +) { +} + +function( + # dangles () +) { } function() { - # becomes leading on `1 + 1` + # dangles {} +} + +function() a # trails function + +function() { + # leads `a` + a +} + +function() { + # leads `a` # an inner comment - 1 + 1 + a } function() { - # becomes dangling on the `{}` + # dangles {} } function() { - # becomes dangling on the `{}` + # dangles {} # an inner comment but empty `{}` } -function() - # becomes leading on `1 + 1` - 1 + 1 +function() { + # leads `a` + a +} -\(x, y) 1 +# Not much we can do here, it's not enclosed by the `function_definition` node +# so it ends up trailing the `}` of the function. This is consistent with +# non-enclosed comments in if/else and loops. +function() { + a +} # trails function + +function( + # leads `a` + a +) { + # comment +} + +function( + a # trails `a` +) { + # comment +} + +function( + a + # trails `a` +) { + # comment +} # ------------------------------------------------------------------------ # User requested line break @@ -257,34 +420,4 @@ fn <- function( ) { body } - -# ------------------------------------------------------------------------ -# User requested line break and trailing anonymous functions in calls - -# Ensure these features play nicely together - -# This user line break expands the function definition, causing the whole -# `map()` to expand -map( - xs, - function( - x, - option = "a" - ) { - x - } -) - -# This flattens the function definition, but the `map()` stays expanded -map( - xs, - function(x, option = "a") { - x - } -) - -# This flattens to one line -map(xs, function(x, option = "a") { - x -}) ``` diff --git a/crates/air_r_formatter/tests/specs/r/repeat_statement.R b/crates/air_r_formatter/tests/specs/r/repeat_statement.R index c36e98db..d1d0e8cf 100644 --- a/crates/air_r_formatter/tests/specs/r/repeat_statement.R +++ b/crates/air_r_formatter/tests/specs/r/repeat_statement.R @@ -1,41 +1,59 @@ -repeat 1 - repeat {} -repeat { # a comment +repeat { + a + b +} + +repeat +{ + a + b } -repeat { # comment1 - # comment2 +# ------------------------------------------------------------------------------ +# Autobracing + +repeat 1 + +repeat 1 + 1 + +# ------------------------------------------------------------------------------ +# Comments + +repeat { # dangles {} } -repeat # comment1 +# These should be consistent +repeat { # leads a + # leads a 2 + a +} +repeat # leads a { - # comment2 - 1 + 1 + # leads a 2 + a } -repeat # comment1 +repeat # dangles {} {} -repeat # comment1 +repeat # dangles {} { - # comment2 + # dangles {} 2 } repeat -# comment1 +# leads a { - 1 + 1 + a } -# comment1 +# leads repeat repeat { - # comment2 - 1 + 1 + # leads a + a } -repeat # comment1 - 1 +repeat # leads a + a diff --git a/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap b/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap index 672a943d..4bf4e65a 100644 --- a/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap +++ b/crates/air_r_formatter/tests/specs/r/repeat_statement.R.snap @@ -5,47 +5,65 @@ info: r/repeat_statement.R # Input ```R -repeat 1 - repeat {} -repeat { # a comment +repeat { + a + b +} + +repeat +{ + a + b } -repeat { # comment1 - # comment2 +# ------------------------------------------------------------------------------ +# Autobracing + +repeat 1 + +repeat 1 + 1 + +# ------------------------------------------------------------------------------ +# Comments + +repeat { # dangles {} } -repeat # comment1 +# These should be consistent +repeat { # leads a + # leads a 2 + a +} +repeat # leads a { - # comment2 - 1 + 1 + # leads a 2 + a } -repeat # comment1 +repeat # dangles {} {} -repeat # comment1 +repeat # dangles {} { - # comment2 + # dangles {} 2 } repeat -# comment1 +# leads a { - 1 + 1 + a } -# comment1 +# leads repeat repeat { - # comment2 - 1 + 1 + # leads a + a } -repeat # comment1 - 1 +repeat # leads a + a ``` @@ -66,48 +84,69 @@ Skip: None ----- ```R -repeat 1 +repeat { +} repeat { + a + b } repeat { - # a comment + a + b } +# ------------------------------------------------------------------------------ +# Autobracing + repeat { - # comment1 - # comment2 - 1 + 1 + 1 } repeat { - # comment1 - # comment2 1 + 1 } +# ------------------------------------------------------------------------------ +# Comments + repeat { - # comment1 + # dangles {} } +# These should be consistent repeat { - # comment1 - # comment2 + # leads a + # leads a 2 + a +} +repeat { + # leads a + # leads a 2 + a } repeat { - # comment1 - 1 + 1 + # dangles {} } -# comment1 repeat { - # comment2 - 1 + 1 + # dangles {} + # dangles {} 2 } -repeat - # comment1 - 1 +repeat { + # leads a + a +} + +# leads repeat +repeat { + # leads a + a +} + +repeat { + # leads a + a +} ``` diff --git a/crates/air_r_formatter/tests/specs/r/while_statement.R b/crates/air_r_formatter/tests/specs/r/while_statement.R index 355c906d..91fab1c5 100644 --- a/crates/air_r_formatter/tests/specs/r/while_statement.R +++ b/crates/air_r_formatter/tests/specs/r/while_statement.R @@ -1,13 +1,9 @@ -while(a)a - while(a){} while(a) { 1 + 1 } -# TODO: Not entirely sure how this should be formatted. -# It's not very common though. while({ complex }) { 1 + 1 } @@ -16,30 +12,75 @@ while(super_long_function_name_is_true_man_this_is_a_really_really_long_function 1 + 1 } -while( - # comment - a) { - 1 + 1 +# ------------------------------------------------------------------------------ +# Autobracing + +while(a)a + +while(a) + a + +# ------------------------------------------------------------------------------ +# Comments + +while # leads while +(a) { + b +} + +while +# leads while +(a) { + b +} + +while(a # leads while +) { + b } while(a -# comment +# leads while ) { - 1 + 1 + b } -while(a # comment +while( # leads while + a) { + b +} + +while( + # leads while + a) { + b +} + +while( + a + # leads while ) { - 1 + 1 + b +} + +while(a) # leads b +{ + b +} + +while(a) +# leads b +{ + b } -while(a) # comment +while(a) # dangles {} {} -while(a) # comment -1 +while(a) # leads b +b -while(a) # comment1 +while(a) # dangles {} { - # comment2 + # dangles {} 2 } diff --git a/crates/air_r_formatter/tests/specs/r/while_statement.R.snap b/crates/air_r_formatter/tests/specs/r/while_statement.R.snap index 335f4a8f..4f6c0dd3 100644 --- a/crates/air_r_formatter/tests/specs/r/while_statement.R.snap +++ b/crates/air_r_formatter/tests/specs/r/while_statement.R.snap @@ -5,16 +5,12 @@ info: r/while_statement.R # Input ```R -while(a)a - while(a){} while(a) { 1 + 1 } -# TODO: Not entirely sure how this should be formatted. -# It's not very common though. while({ complex }) { 1 + 1 } @@ -23,32 +19,77 @@ while(super_long_function_name_is_true_man_this_is_a_really_really_long_function 1 + 1 } -while( - # comment - a) { - 1 + 1 +# ------------------------------------------------------------------------------ +# Autobracing + +while(a)a + +while(a) + a + +# ------------------------------------------------------------------------------ +# Comments + +while # leads while +(a) { + b +} + +while +# leads while +(a) { + b +} + +while(a # leads while +) { + b } while(a -# comment +# leads while ) { - 1 + 1 + b } -while(a # comment +while( # leads while + a) { + b +} + +while( + # leads while + a) { + b +} + +while( + a + # leads while ) { - 1 + 1 + b +} + +while(a) # leads b +{ + b +} + +while(a) +# leads b +{ + b } -while(a) # comment +while(a) # dangles {} {} -while(a) # comment -1 +while(a) # leads b +b -while(a) # comment1 +while(a) # dangles {} { - # comment2 + # dangles {} 2 } ``` @@ -70,8 +111,6 @@ Skip: None ----- ```R -while (a) a - while (a) { } @@ -79,8 +118,6 @@ while (a) { 1 + 1 } -# TODO: Not entirely sure how this should be formatted. -# It's not very common though. while ( { complex @@ -95,36 +132,76 @@ while ( 1 + 1 } -while ( - # comment +# ------------------------------------------------------------------------------ +# Autobracing + +while (a) { a -) { - 1 + 1 } -while ( +while (a) { a - # comment -) { - 1 + 1 } -while ( - a # comment -) { - 1 + 1 +# ------------------------------------------------------------------------------ +# Comments + +# leads while +while (a) { + b } +# leads while while (a) { - # comment + b } -while (a) - # comment - 1 +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +# leads while +while (a) { + b +} + +while (a) { + # leads b + b +} + +while (a) { + # leads b + b +} + +while (a) { + # dangles {} +} + +while (a) { + # leads b + b +} while (a) { - # comment1 - # comment2 + # dangles {} + # dangles {} 2 } ``` diff --git a/docs/formatter.qmd b/docs/formatter.qmd index 84828428..916639fd 100644 --- a/docs/formatter.qmd +++ b/docs/formatter.qmd @@ -174,6 +174,298 @@ list(foo, bar) The goal of this feature is to strike a balance between being opinionated and recognizing that users often know when taking up more vertical space results in more readable output. +# Autobracing + +To encourage more consistent, readable, and portable code, Air will *autobrace* the following elements: + +- If statements + +- For, while, and repeat loops + +- Function definitions + +Autobracing is the process of wrapping the body of these code elements with `{ }` if braces don't already exist. + +## If statements + +Air will autobrace if statements if: + +- Any existing part of the if statement spans multiple lines + +- Any existing part of the if statement is already braced + +- The if statement is nested, i.e. there is an `else if {` + +- The if statement exceeds the line length + +For example, the following will all be autobraced: + +``` r +if (condition) + a + +# Becomes: +if (condition) { + a +} +``` + +``` r +if (condition) a else { b } + +# Becomes: +if (condition) { + a +} else { + b +} +``` + +``` r +if (condition) a else if (condition2) b else c + +# Becomes: +if (condition) { + a +} else if (condition2) { + b +} else { + c +} +``` + +Simple if statements that don't hit any of the autobracing criteria mentioned above are allowed to stay on one line as long as they are also in *value* position, as opposed to *effect* position. + +- Top level if statements are in effect position. + +- If statements that are direct children of `{}` are in effect position, unless the if statement is the last child of the `{}` expression list, in which case it is in value position (because it is the returned value from that scope). + +- Otherwise, the if statement is in value position. + +These if statements are in effect position, and would be autobraced: + +``` r +# If statement at top level +if (condition) a else b + +# If statement inside `{}` +fn <- function() { + if (condition) stop("oh no") + 1 + 1 +} + +fn <- function() { + if (condition) return(1) + 1 + 1 +} +``` + +These are in value position, and would not be autobraced: + +``` r +x <- if (condition) 1 else 2 + +x <- x %||% if (condition) 1 else 2 + +list(a = if (condition) 1 else 2) + +function( + a, + optional = if (is.null(a)) 1 else 2 +) { +} + +# If statement is the last expression of the `{}` scope +map(xs, function(x) { + if (is.null(x)) 1 else 2 +}) +``` + +### Portability + +It is particularly important to autobrace multiline if statements for *portability*, which is the ability to copy and paste that if statement into any context and have it still parse. +Consider the following if statement: + +``` r +fn <- function(a) { + if (is.null(a)) + 1 + else + 2 +} +``` + +This parses and runs correctly while the if statement is nested within the `{}` braces of the function. +But if you're testing this code and you copy and paste it out of the function, then it no longer parses: + +``` r +if (is.null(a)) + 1 +else + 2 +``` + +If you try and run this, then you'll see an error like `Error: unexpected 'else'`. +This is particularly annoying when you're working inside a debugger. +Most R debuggers allow you to pause inside functions and highlight and run chunks of that function. +If you're paused inside `fn()` and try to highlight and run the if statement, then it will confusingly fail to parse. +Autobracing multiline if statements avoids this problem entirely. + +## For, while, and repeat loops + +Air unconditionally autobraces the body of all R loops. + +``` r +for (i in 1:5) x <- x + i + +# Becomes: +for (i in 1:5) { + x <- x + i +} +``` + +``` r +while (x < 5) x <- x + 1 + +# Becomes: +while (x < 5) { + x <- x + 1 +} +``` + +## Function definitions + +Air will autobrace the body of a function definition if: + +- Any existing part of the function definition spans multiple lines + +- The function definition exceeds the line length + +``` r +fn <- function(a, b) + a + b + +# Becomes: +fn <- function(a, b) { + a + b +} +``` + +``` r +fn <- function( + a, + b +) a + b + +# Becomes +fn <- function( + a, + b +) { + a + b +} +``` + +``` r +fn <- function(a_really_long_variable_name, another_really_long_name) a_really_long_variable_name + another_really_long_name + +# Becomes: +fn <- function( + a_really_long_variable_name, + another_really_long_name +) { + a_really_long_variable_name + another_really_long_name +} +``` + +Short function definitions are allowed on one line and will not be autobraced. +These are all allowed by Air: + +``` r +add_one <- function(x) x + 1 + +map_lgl(xs, function(x) is.logical(x) && length(x) == 1L && !is.na(x)) + +# This includes anonymous functions +map_lgl(xs, \(x) is.list(x) && length(x) == 0L) +``` + +## With persistent line breaks + +Autobracing is particularly useful as a code rewriting tool when combined with persistent line breaks. +Consider: + +``` r +result <- map_lgl(xs, function(x) is.logical(x) && length(x) == 1L && !is.na(x)) +``` + +This may be easier to read if it spanned across multiple lines. +You could manually rework this, or you could let Air help you! +There are two places you could put a persistent line break depending on what your desired final result is: + +``` r +# Adding a line break before `xs` expands the call +result <- map_lgl( + xs, function(x) is.logical(x) && length(x) == 1L && !is.na(x)) + +# Becomes: +result <- map_lgl( + xs, + function(x) is.logical(x) && length(x) == 1L && !is.na(x) +) +``` + +``` r +# Adding a line break before `is.logical(x)` forces autobracing +result <- map_lgl(xs, function(x) + is.logical(x) && length(x) == 1L && !is.na(x)) + +# Becomes: +result <- map_lgl(xs, function(x) { + is.logical(x) && length(x) == 1L && !is.na(x) +}) +``` + +## Comments + +Air generally avoids moving your comments. +However, when Air autobraces code, it may have to adjust them. +This generally works quite well for most code, but is impossible to do perfectly. +It is possible that you will have to adjust the placement of your comments after Air runs. + +For example, leading comments on autobraced elements are generally placed in a way that you'd expect: + +``` r +if (condition) + # My comment + a + +# Becomes: +if (condition) { + # My comment + a +} +``` + +But trailing comments might need manual adjustment: + +``` r +if (condition) + a # My comment + +# Becomes: +if (condition) { + a +} # My comment + +# You may want to adjust it to: +if (condition) { + a # My comment +} +``` + +In general, prefer leading comments over trailing comments for readability and to have the highest chance of Air placing it in the correct location when comment adjustment is required. + # Disabling formatting ## Skip comments