From d6bc850450aa4df4fb77567601695e829bb6ee51 Mon Sep 17 00:00:00 2001 From: Gengar Date: Tue, 2 Dec 2025 17:54:20 +0200 Subject: [PATCH 1/6] Refactor comment extraction logic in db.rs --- crates/cairo-lang-doc/src/db.rs | 85 ++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 28 deletions(-) diff --git a/crates/cairo-lang-doc/src/db.rs b/crates/cairo-lang-doc/src/db.rs index 07dd2b85b71..8f5b6201af2 100644 --- a/crates/cairo-lang-doc/src/db.rs +++ b/crates/cairo-lang-doc/src/db.rs @@ -154,16 +154,18 @@ fn extract_item_outer_documentation<'db>( ) -> Option { // Get the text of the item (trivia + definition) let raw_text = item_id.stable_location(db)?.syntax_node(db).get_text(db); - Some( - raw_text + let comment_lines: Vec<&str> = raw_text .lines() .filter(|line| !line.trim().is_empty()) // Takes all the lines before the definition. // Anything other than doc comments will be filtered out later. .take_while_ref(|line| is_comment_line(line) || line.trim_start().starts_with("#")) - .filter_map(|line| extract_comment_from_code_line(line, &["///"])) - .join("\n"), - ) + .filter_map(|line| extract_comment_line_content(line, &["///"])) + .collect(); + if comment_lines.is_empty() { + return None; + } + Some(dedent_comment_block(&comment_lines)) } /// Gets the module level comments of the item. @@ -187,12 +189,16 @@ fn extract_item_module_level_documentation<'db>( /// Only gets the comments inside the item. fn extract_item_inner_documentation_from_raw_text(raw_text: String) -> String { - raw_text + let comment_lines: Vec<&str> = raw_text .lines() .filter(|line| !line.trim().is_empty()) .skip_while(|line| is_comment_line(line)) - .filter_map(|line| extract_comment_from_code_line(line, &["//!"])) - .join("\n") + .filter_map(|line| extract_comment_line_content(line, &["//!"])) + .collect(); + if comment_lines.is_empty() { + return String::new(); + } + dedent_comment_block(&comment_lines) } /// Gets the module level comments of certain file. @@ -201,41 +207,64 @@ fn extract_item_module_level_documentation_from_file<'db>( file_id: FileId<'db>, ) -> Option { let file_content = db.file_content(file_id)?.to_string(); - Some( - file_content - .lines() - .filter(|line| !line.trim().is_empty()) - .take_while_ref(|line| is_comment_line(line)) - .filter_map(|line| extract_comment_from_code_line(line, &["//!"])) - .join("\n"), - ) + let comment_lines: Vec<&str> = file_content + .lines() + .filter(|line| !line.trim().is_empty()) + .take_while_ref(|line| is_comment_line(line)) + .filter_map(|line| extract_comment_line_content(line, &["//!"])) + .collect(); + if comment_lines.is_empty() { + return None; + } + Some(dedent_comment_block(&comment_lines)) } -/// This function does 3 things to the line of comment: -/// 1. Removes indentation -/// 2. If it starts with one of the passed prefixes, removes the given prefixes (including the space -/// after the prefix). -/// 3. If the comment starts with a slash, returns None. -fn extract_comment_from_code_line(line: &str, comment_markers: &[&'static str]) -> Option { +/// Extracts the content from a comment line (without the marker and leading indentation). +/// Returns None if the line is not a doc comment or starts with a slash after the marker. +fn extract_comment_line_content<'a>( + line: &'a str, + comment_markers: &[&'static str], +) -> Option<&'a str> { // Remove indentation. let dedent = line.trim_start(); // Check if this is a doc comment. for comment_marker in comment_markers { if let Some(content) = dedent.strip_prefix(*comment_marker) { - // TODO(mkaput): The way how removing this indentation is performed is probably - // wrong. The code should probably learn how many spaces are used at the first - // line of comments block, and then remove the same amount of spaces in the - // block, instead of assuming just one space. - // Remove inner indentation if one exists. + // Skip lines that start with a slash (like /// or //!). if content.starts_with('/') { return None; } - return Some(content.strip_prefix(' ').unwrap_or(content).to_string()); + return Some(content); } } None } +/// Removes the common leading indentation from a block of comment lines. +/// This function finds the minimum indentation (number of spaces after the comment marker) +/// across all non-empty lines and removes that amount from each line. +fn dedent_comment_block(lines: &[&str]) -> String { + if lines.is_empty() { + return String::new(); + } + + // Find the minimum indentation (number of leading spaces) across all lines. + let min_indent = lines + .iter() + .filter_map(|line| { + let trimmed = line.trim(); + if trimmed.is_empty() { None } else { Some(line.len() - line.trim_start().len()) } + }) + .min() + .unwrap_or(0); + + // Remove the minimum indentation from each line. + lines + .iter() + .map(|line| if line.len() >= min_indent { &line[min_indent..] } else { line }) + .join("\n") +} + /// Check whether the code line is a comment line. fn is_comment_line(line: &str) -> bool { line.trim_start().starts_with("//") From 6a5b83c054d42241bc32ec977e1d39e12ecac6cd Mon Sep 17 00:00:00 2001 From: Gengar Date: Sun, 7 Dec 2025 12:58:58 +0200 Subject: [PATCH 2/6] Add indentation test data for documentation --- .../src/tests/test-data/indentation.txt | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 crates/cairo-lang-doc/src/tests/test-data/indentation.txt diff --git a/crates/cairo-lang-doc/src/tests/test-data/indentation.txt b/crates/cairo-lang-doc/src/tests/test-data/indentation.txt new file mode 100644 index 00000000000..6df936450df --- /dev/null +++ b/crates/cairo-lang-doc/src/tests/test-data/indentation.txt @@ -0,0 +1,91 @@ +//! > Documentation + +//! > test_runner_name +documentation_test_runner + +//! > cairo_project.toml +[crate_roots] +hello = "src" + +//! > cairo_code +/// Function with multi-line doc comment. +/// This line has extra indentation. +/// This line has less indentation. +/// Another indented line. +fn test_function() {} + +/// Another function with varying indentation. +/// First line with some indentation. +/// Second line with more indentation. +/// Third line with no extra indentation. +/// Fourth line with some indentation. +fn another_function() {} + +/// Function demonstrating that minimum indentation is removed uniformly. +/// All lines have at least 2 spaces. +/// Some lines have 4 spaces. +/// But minimum (2 spaces) is removed from all. +fn indented_function() {} + +//! > Item signature #1 + +//! > Item documentation #1 + +//! > Item documentation tokens #1 + +//! > Item signature #2 +fn test_function() + +//! > Item documentation #2 +Function with multi-line doc comment. +This line has extra indentation. +This line has less indentation. +Another indented line. + +//! > Item documentation tokens #2 +Content("Function with multi-line doc comment.") +Content("\n") +Content("This line has extra indentation.") +Content("\n") +Content("This line has less indentation.") +Content("\n") +Content("Another indented line.") + +//! > Item signature #3 +fn another_function() + +//! > Item documentation #3 +Another function with varying indentation. +First line with some indentation. +Second line with more indentation. +Third line with no extra indentation. +Fourth line with some indentation. + +//! > Item documentation tokens #3 +Content("Another function with varying indentation.") +Content("\n") +Content("First line with some indentation.") +Content("\n") +Content("Second line with more indentation.") +Content("\n") +Content("Third line with no extra indentation.") +Content("\n") +Content("Fourth line with some indentation.") + +//! > Item signature #4 +fn indented_function() + +//! > Item documentation #4 +Function demonstrating that minimum indentation is removed uniformly. +All lines have at least 2 spaces. +Some lines have 4 spaces. +But minimum (2 spaces) is removed from all. + +//! > Item documentation tokens #4 +Content("Function demonstrating that minimum indentation is removed uniformly.") +Content("\n") +Content("All lines have at least 2 spaces.") +Content("\n") +Content("Some lines have 4 spaces.") +Content("\n") +Content("But minimum (2 spaces) is removed from all.") From 07d0be08ce11a8f2b277cb243be5a16823217dc3 Mon Sep 17 00:00:00 2001 From: Gengar Date: Sun, 7 Dec 2025 12:59:55 +0200 Subject: [PATCH 3/6] Add indentation formatting to test configuration --- crates/cairo-lang-doc/src/tests/test.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/cairo-lang-doc/src/tests/test.rs b/crates/cairo-lang-doc/src/tests/test.rs index d10ceba857f..6f6a63d4e0a 100644 --- a/crates/cairo-lang-doc/src/tests/test.rs +++ b/crates/cairo-lang-doc/src/tests/test.rs @@ -30,6 +30,7 @@ cairo_lang_test_utils::test_file_test!( tables_formatting: "tables_formatting.txt", rules_formatting: "rules_formatting.txt", font_formatting: "font_formatting.txt", + indentation: "indentation.txt", }, documentation_test_runner ); From 21307042c083e80b860b411493e49e29805ee788 Mon Sep 17 00:00:00 2001 From: Gengar Date: Wed, 10 Dec 2025 14:59:01 +0200 Subject: [PATCH 4/6] Fix indentation in documentation comments Updated indentation in documentation comments to ensure consistent formatting and clarity. --- .../src/tests/test-data/indentation.txt | 64 +++++++++++++------ 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/crates/cairo-lang-doc/src/tests/test-data/indentation.txt b/crates/cairo-lang-doc/src/tests/test-data/indentation.txt index 6df936450df..1daafa43c99 100644 --- a/crates/cairo-lang-doc/src/tests/test-data/indentation.txt +++ b/crates/cairo-lang-doc/src/tests/test-data/indentation.txt @@ -27,6 +27,13 @@ fn another_function() {} /// But minimum (2 spaces) is removed from all. fn indented_function() {} +/// This test case would have failed before the fix. +/// Line with 2 spaces indentation. +/// Line with 4 spaces indentation. +/// Another line with 2 spaces. +/// Line with 6 spaces indentation. +fn test_case_that_failed_before() {} + //! > Item signature #1 //! > Item documentation #1 @@ -38,54 +45,75 @@ fn test_function() //! > Item documentation #2 Function with multi-line doc comment. -This line has extra indentation. -This line has less indentation. -Another indented line. + This line has extra indentation. + This line has less indentation. + Another indented line. //! > Item documentation tokens #2 Content("Function with multi-line doc comment.") Content("\n") -Content("This line has extra indentation.") +Content(" This line has extra indentation.") Content("\n") -Content("This line has less indentation.") +Content(" This line has less indentation.") Content("\n") -Content("Another indented line.") +Content(" Another indented line.") //! > Item signature #3 fn another_function() //! > Item documentation #3 Another function with varying indentation. -First line with some indentation. -Second line with more indentation. + First line with some indentation. + Second line with more indentation. Third line with no extra indentation. -Fourth line with some indentation. + Fourth line with some indentation. //! > Item documentation tokens #3 Content("Another function with varying indentation.") Content("\n") -Content("First line with some indentation.") +Content(" First line with some indentation.") Content("\n") -Content("Second line with more indentation.") +Content(" Second line with more indentation.") Content("\n") Content("Third line with no extra indentation.") Content("\n") -Content("Fourth line with some indentation.") +Content(" Fourth line with some indentation.") //! > Item signature #4 fn indented_function() //! > Item documentation #4 Function demonstrating that minimum indentation is removed uniformly. -All lines have at least 2 spaces. -Some lines have 4 spaces. -But minimum (2 spaces) is removed from all. + All lines have at least 2 spaces. + Some lines have 4 spaces. + But minimum (2 spaces) is removed from all. //! > Item documentation tokens #4 Content("Function demonstrating that minimum indentation is removed uniformly.") Content("\n") -Content("All lines have at least 2 spaces.") +Content(" All lines have at least 2 spaces.") +Content("\n") +Content(" Some lines have 4 spaces.") +Content("\n") +Content(" But minimum (2 spaces) is removed from all.") + +//! > Item signature #5 +fn test_case_that_failed_before() + +//! > Item documentation #5 +This test case would have failed before the fix. + Line with 2 spaces indentation. + Line with 4 spaces indentation. + Another line with 2 spaces. + Line with 6 spaces indentation. + +//! > Item documentation tokens #5 +Content("This test case would have failed before the fix.") +Content("\n") +Content(" Line with 2 spaces indentation.") +Content("\n") +Content(" Line with 4 spaces indentation.") Content("\n") -Content("Some lines have 4 spaces.") +Content(" Another line with 2 spaces.") Content("\n") -Content("But minimum (2 spaces) is removed from all.") +Content(" Line with 6 spaces indentation.") From e33e7dc021beed0c413e836bd2de9190a6de0d6a Mon Sep 17 00:00:00 2001 From: Gengar Date: Wed, 10 Dec 2025 14:59:24 +0200 Subject: [PATCH 5/6] Modify comment extraction to preserve indentation Updated the extract_comment_line_content function to preserve indentation after the comment marker in the extracted content. --- crates/cairo-lang-doc/src/db.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/cairo-lang-doc/src/db.rs b/crates/cairo-lang-doc/src/db.rs index 8f5b6201af2..a303bd175c3 100644 --- a/crates/cairo-lang-doc/src/db.rs +++ b/crates/cairo-lang-doc/src/db.rs @@ -219,13 +219,14 @@ fn extract_item_module_level_documentation_from_file<'db>( Some(dedent_comment_block(&comment_lines)) } -/// Extracts the content from a comment line (without the marker and leading indentation). +/// Extracts the content from a comment line (without the marker, but preserving indentation +/// after the marker). /// Returns None if the line is not a doc comment or starts with a slash after the marker. fn extract_comment_line_content<'a>( line: &'a str, comment_markers: &[&'static str], ) -> Option<&'a str> { - // Remove indentation. + // Remove indentation before the comment marker. let dedent = line.trim_start(); // Check if this is a doc comment. for comment_marker in comment_markers { @@ -234,6 +235,7 @@ fn extract_comment_line_content<'a>( if content.starts_with('/') { return None; } + // Return the content after the marker, preserving any spaces after the marker. return Some(content); } } From cd239da694c6ee48e3a0a5f2838b400c2f8c833f Mon Sep 17 00:00:00 2001 From: Gengar Date: Wed, 10 Dec 2025 14:59:47 +0200 Subject: [PATCH 6/6] Improve indentation handling in documentation comments Added logic to restore indentation for documentation comments by mapping line indices to their leading spaces. Enhanced text processing to handle both exact and partial line matches for improved formatting. --- crates/cairo-lang-doc/src/parser.rs | 90 ++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/crates/cairo-lang-doc/src/parser.rs b/crates/cairo-lang-doc/src/parser.rs index dcb5e6aac1a..75adad5704f 100644 --- a/crates/cairo-lang-doc/src/parser.rs +++ b/crates/cairo-lang-doc/src/parser.rs @@ -83,6 +83,13 @@ impl<'db> DocumentationCommentParser<'db> { item_id: DocumentableItemId<'db>, documentation_comment: String, ) -> Vec> { + // Build a map of line indices to their leading indentation (number of spaces) + // before markdown parsing removes them. + let line_indents: Vec = documentation_comment + .lines() + .map(|line| line.len() - line.trim_start().len()) + .collect(); + let mut tokens = Vec::new(); let mut current_link: Option> = None; let mut is_indented_code_block = false; @@ -139,7 +146,88 @@ impl<'db> DocumentationCommentParser<'db> { if is_indented_code_block { format!(" {text}") } else { - text.to_string() + // Process text line by line to restore indentation + let text_str = text.as_ref(); + let lines: Vec<&str> = text_str.split_inclusive('\n').collect(); + + let mut result = String::new(); + for (line_idx, line) in lines.iter().enumerate() { + let trimmed_line = line.trim(); + + // Check if this is the start of a new line in the original text + let is_new_line = line_idx == 0 + && (tokens.is_empty() + || tokens + .last() + .and_then(|last| { + if let DocumentationCommentToken::Content( + content, + ) = last + { + Some(content.ends_with('\n')) + } else { + None + } + }) + .unwrap_or(true)); + + // For each non-empty line, try to find matching line in + // original text + if !trimmed_line.is_empty() && (is_new_line || line_idx > 0) { + // Find the line in original text that matches this content + // Try exact match first, then partial match + let mut found_line_num = None; + for (i, orig_line) in + documentation_comment.lines().enumerate() + { + let trimmed_orig = orig_line.trim(); + // Exact match (most reliable) + if trimmed_orig == trimmed_line { + found_line_num = Some(i); + break; + } + } + + // If no exact match, try partial match + if found_line_num.is_none() { + for (i, orig_line) in + documentation_comment.lines().enumerate() + { + let trimmed_orig = orig_line.trim(); + // Check if one is a prefix of the other (for cases + // where markdown splits text) + if (trimmed_line.len() >= 5 + && trimmed_orig.starts_with( + &trimmed_line[..trimmed_line + .len() + .min(trimmed_orig.len())], + )) + || (trimmed_orig.len() >= 5 + && trimmed_line.starts_with( + &trimmed_orig[..trimmed_orig + .len() + .min(trimmed_line.len())], + )) + { + found_line_num = Some(i); + break; + } + } + } + + if let Some(line_num) = found_line_num + && line_num < line_indents.len() + { + let indent = line_indents[line_num]; + if indent > 0 { + result.push_str(&" ".repeat(indent)); + } + } + } + result.push_str(line); + } + + if result.is_empty() { text.to_string() } else { result } } }; tokens.push(DocumentationCommentToken::Content(text));