From 9e11645e77a31af933d699acf53c554e37a459cc Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Mon, 20 Jan 2025 22:02:55 -0800 Subject: [PATCH 1/5] use quick_xml - deserialize only what is needed from XML. `length` and `value` fields are not used and are now ignored when parsing XML - read file contents only once when translating byte offset to line and columns --- Cargo.lock | 50 ++------- cpp-linter/Cargo.toml | 2 +- cpp-linter/src/clang_tools/clang_format.rs | 113 ++++++++------------- cpp-linter/src/common_fs/mod.rs | 18 ++-- cpp-linter/src/rest_api/github/mod.rs | 2 - 5 files changed, 59 insertions(+), 126 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index adf2088..ba733f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -366,11 +366,11 @@ dependencies = [ "mockito", "openssl", "openssl-probe", + "quick-xml", "regex", "reqwest", "semver", "serde", - "serde-xml-rs", "serde_json", "tempfile", "tokio", @@ -1598,6 +1598,16 @@ dependencies = [ "syn", ] +[[package]] +name = "quick-xml" +version = "0.37.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quote" version = "1.0.38" @@ -1886,18 +1896,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-xml-rs" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb3aa78ecda1ebc9ec9847d5d3aba7d618823446a049ba2491940506da6e2782" -dependencies = [ - "log", - "serde", - "thiserror", - "xml-rs", -] - [[package]] name = "serde_derive" version = "1.0.217" @@ -2066,26 +2064,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tinystr" version = "0.7.6" @@ -2555,12 +2533,6 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" -[[package]] -name = "xml-rs" -version = "0.8.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5b940ebc25896e71dd073bad2dbaa2abfe97b0a391415e22ad1326d9c54e3c4" - [[package]] name = "yoke" version = "0.7.5" diff --git a/cpp-linter/Cargo.toml b/cpp-linter/Cargo.toml index 4aa3fd8..450a955 100644 --- a/cpp-linter/Cargo.toml +++ b/cpp-linter/Cargo.toml @@ -25,11 +25,11 @@ lenient_semver = "0.4.2" log = { version = "0.4.25", features = ["std"] } openssl = { version = "0.10", features = ["vendored"], optional = true } openssl-probe = { version = "0.1", optional = true } +quick-xml = {version = "0.37.2", features = ["serialize"]} regex = "1.11.1" reqwest = "0.12.12" semver = "1.0.25" serde = { version = "1.0.217", features = ["derive"] } -serde-xml-rs = "0.6.0" serde_json = "1.0.137" tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"]} tokio-macros = "2.4.0" diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index ba0b1d9..5cef982 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -2,15 +2,14 @@ //! output. use std::{ + fs, process::Command, sync::{Arc, Mutex, MutexGuard}, }; use anyhow::{Context, Result}; use log::Level; -// non-std crates use serde::Deserialize; -use serde_xml_rs::de::Deserializer; // project-specific crates/modules use super::MakeSuggestions; @@ -19,12 +18,10 @@ use crate::{ common_fs::{get_line_cols_from_offset, FileObj}, }; -/// A Structure used to deserialize clang-format's XML output. -#[derive(Debug, Deserialize, PartialEq, Clone)] -#[serde(rename = "replacements")] +#[derive(Debug, Clone, Deserialize)] pub struct FormatAdvice { /// A list of [`Replacement`]s that clang-tidy wants to make. - #[serde(rename = "$value")] + #[serde(rename(deserialize = "replacement"))] pub replacements: Vec, pub patched: Option>, @@ -41,18 +38,12 @@ impl MakeSuggestions for FormatAdvice { } /// A single replacement that clang-format wants to make. -#[derive(Debug, Deserialize, PartialEq)] +#[derive(Debug, PartialEq, Default, Clone, Copy, Deserialize)] pub struct Replacement { /// The byte offset where the replacement will start. + #[serde(rename = "@offset")] pub offset: usize, - /// The amount of bytes that will be removed. - pub length: usize, - - /// The bytes (UTF-8 encoded) that will be added at the [`Replacement::offset`] position. - #[serde(rename = "$value")] - pub value: Option, - /// The line number described by the [`Replacement::offset`]. /// /// This value is not provided by the XML output, but we calculate it after @@ -68,18 +59,6 @@ pub struct Replacement { pub cols: usize, } -impl Clone for Replacement { - fn clone(&self) -> Self { - Replacement { - offset: self.offset, - length: self.length, - value: self.value.clone(), - line: self.line, - cols: self.cols, - } - } -} - /// Get a string that summarizes the given `--style` pub fn summarize_style(style: &str) -> String { if ["google", "chromium", "microsoft", "mozilla", "webkit"].contains(&style) { @@ -173,38 +152,37 @@ pub fn run_clang_format( if output.stdout.is_empty() { return Ok(logs); } - let xml = String::from_utf8(output.stdout) - .with_context(|| format!("stdout from clang-format was not UTF-8 encoded: {file_name}"))? - .lines() - .collect::>() - .join(""); - let config = serde_xml_rs::ParserConfig::new() - .trim_whitespace(false) - .whitespace_to_characters(true) - .ignore_root_level_whitespace(true); - let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config); - let mut format_advice = FormatAdvice::deserialize(&mut Deserializer::new(event_reader)) - .unwrap_or(FormatAdvice { - replacements: vec![], - patched: None, - }); + let xml = String::from_utf8(output.stdout).with_context(|| { + format!("XML output from clang-format was not UTF-8 encoded: {file_name}") + })?; + let mut format_advice = quick_xml::de::from_str::(&xml).unwrap_or(FormatAdvice { + replacements: vec![], + patched: None, + }); format_advice.patched = patched; if !format_advice.replacements.is_empty() { + let original_contents = fs::read(&file.name).with_context(|| { + format!( + "Failed to cache file's original content before applying clang-tidy changes: {}", + file_name.clone() + ) + })?; // get line and column numbers from format_advice.offset let mut filtered_replacements = Vec::new(); for replacement in &mut format_advice.replacements { - let (line_number, columns) = get_line_cols_from_offset(&file.name, replacement.offset); + let (line_number, columns) = + get_line_cols_from_offset(&original_contents, replacement.offset); replacement.line = line_number; replacement.cols = columns; for range in &ranges { if range.contains(&line_number.try_into().unwrap_or(0)) { - filtered_replacements.push(replacement.clone()); + filtered_replacements.push(*replacement); break; } } if ranges.is_empty() { // lines_changed_only is disabled - filtered_replacements.push(replacement.clone()); + filtered_replacements.push(*replacement); } } format_advice.replacements = filtered_replacements; @@ -216,7 +194,6 @@ pub fn run_clang_format( #[cfg(test)] mod tests { use super::{summarize_style, FormatAdvice, Replacement}; - use serde::Deserialize; #[test] fn parse_xml() { @@ -226,52 +203,42 @@ mod tests { -"#; - //since whitespace is part of the elements' body, we need to remove the LFs first - let xml = xml_raw.lines().collect::>().join(""); +"# + .as_bytes() + .to_vec(); let expected = FormatAdvice { replacements: vec![ Replacement { offset: 113, - length: 5, - value: Some(String::from("\n ")), - line: 0, - cols: 0, + ..Default::default() }, Replacement { offset: 147, - length: 0, - value: Some(String::from(" ")), - line: 0, - cols: 0, + ..Default::default() }, Replacement { offset: 161, - length: 0, - value: None, - line: 0, - cols: 0, + ..Default::default() }, Replacement { offset: 165, - length: 19, - value: Some(String::from("\n\n")), - line: 0, - cols: 0, + ..Default::default() }, ], patched: None, }; - let config = serde_xml_rs::ParserConfig::new() - .trim_whitespace(false) - .whitespace_to_characters(true) - .ignore_root_level_whitespace(true); - let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config); - let document = - FormatAdvice::deserialize(&mut serde_xml_rs::de::Deserializer::new(event_reader)) - .unwrap(); - assert_eq!(expected, document); + + let xml = String::from_utf8(xml_raw).unwrap(); + + let document = quick_xml::de::from_str::(&xml).unwrap(); + assert_eq!(expected.replacements.len(), document.replacements.len()); + for i in 0..expected.replacements.len() { + assert_eq!( + expected.replacements[i].offset, + document.replacements[i].offset + ); + } } fn formalize_style(style: &str, expected: &str) { diff --git a/cpp-linter/src/common_fs/mod.rs b/cpp-linter/src/common_fs/mod.rs index 00bf62b..15a5884 100644 --- a/cpp-linter/src/common_fs/mod.rs +++ b/cpp-linter/src/common_fs/mod.rs @@ -2,7 +2,6 @@ use std::fmt::Debug; use std::fs; -use std::io::Read; use std::path::{Component, Path}; use std::{ops::RangeInclusive, path::PathBuf}; @@ -228,15 +227,11 @@ impl FileObj { /// boundary exists at the returned column number. However, the `offset` given to this /// function is expected to originate from diagnostic information provided by /// clang-format or clang-tidy. -pub fn get_line_cols_from_offset(file_path: &PathBuf, offset: usize) -> (usize, usize) { - let mut file_buf = vec![0; offset]; - fs::File::open(file_path) - .unwrap() - .read_exact(&mut file_buf) - .unwrap(); - let lines = file_buf.split(|byte| byte == &b'\n'); +pub fn get_line_cols_from_offset(contents: &[u8], offset: usize) -> (usize, usize) { + let lines = contents[0..offset].split(|byte| byte == &b'\n'); let line_count = lines.clone().count(); - let column_count = lines.last().unwrap_or(&[]).len() + 1; // +1 because not a 0 based count + // here we `cols.len() + 1` because columns is not a 0-based count + let column_count = lines.last().map(|cols| cols.len() + 1).unwrap_or(1); (line_count, column_count) } @@ -272,8 +267,8 @@ pub fn normalize_path(path: &Path) -> PathBuf { #[cfg(test)] mod test { - use std::env::current_dir; use std::path::PathBuf; + use std::{env::current_dir, fs}; use super::{get_line_cols_from_offset, normalize_path, FileObj}; use crate::cli::LinesChangedOnly; @@ -317,7 +312,8 @@ mod test { #[test] fn translate_byte_offset() { - let (lines, cols) = get_line_cols_from_offset(&PathBuf::from("tests/demo/demo.cpp"), 144); + let contents = fs::read(PathBuf::from("tests/demo/demo.cpp")).unwrap(); + let (lines, cols) = get_line_cols_from_offset(&contents, 144); println!("lines: {lines}, cols: {cols}"); assert_eq!(lines, 13); assert_eq!(cols, 5); diff --git a/cpp-linter/src/rest_api/github/mod.rs b/cpp-linter/src/rest_api/github/mod.rs index 05b5a61..f0ddd07 100644 --- a/cpp-linter/src/rest_api/github/mod.rs +++ b/cpp-linter/src/rest_api/github/mod.rs @@ -316,8 +316,6 @@ mod test { }); let replacements = vec![Replacement { offset: 0, - length: 0, - value: Some(String::new()), line: 1, cols: 1, }]; From e5261f02a0c8b1eb0bcd9862bbfd6dcb3a6281aa Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Mon, 20 Jan 2025 22:33:42 -0800 Subject: [PATCH 2/5] self review --- cpp-linter/src/clang_tools/clang_format.rs | 46 ++++++------------- cpp-linter/src/common_fs/mod.rs | 8 ++-- .../src/rest_api/github/specific_api.rs | 2 +- 3 files changed, 19 insertions(+), 37 deletions(-) diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index 5cef982..2b81ef5 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -18,7 +18,7 @@ use crate::{ common_fs::{get_line_cols_from_offset, FileObj}, }; -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Deserialize, PartialEq, Eq)] pub struct FormatAdvice { /// A list of [`Replacement`]s that clang-tidy wants to make. #[serde(rename(deserialize = "replacement"))] @@ -38,25 +38,25 @@ impl MakeSuggestions for FormatAdvice { } /// A single replacement that clang-format wants to make. -#[derive(Debug, PartialEq, Default, Clone, Copy, Deserialize)] +#[derive(Debug, PartialEq, Eq, Default, Clone, Copy, Deserialize)] pub struct Replacement { /// The byte offset where the replacement will start. #[serde(rename = "@offset")] - pub offset: usize, + pub offset: u32, /// The line number described by the [`Replacement::offset`]. /// /// This value is not provided by the XML output, but we calculate it after /// deserialization. #[serde(default)] - pub line: usize, + pub line: u32, /// The column number on the line described by the [`Replacement::offset`]. /// /// This value is not provided by the XML output, but we calculate it after /// deserialization. #[serde(default)] - pub cols: usize, + pub cols: u32, } /// Get a string that summarizes the given `--style` @@ -163,8 +163,7 @@ pub fn run_clang_format( if !format_advice.replacements.is_empty() { let original_contents = fs::read(&file.name).with_context(|| { format!( - "Failed to cache file's original content before applying clang-tidy changes: {}", - file_name.clone() + "Failed to read file's original content before translating byte offsets: {file_name}", ) })?; // get line and column numbers from format_advice.offset @@ -175,7 +174,7 @@ pub fn run_clang_format( replacement.line = line_number; replacement.cols = columns; for range in &ranges { - if range.contains(&line_number.try_into().unwrap_or(0)) { + if range.contains(&line_number) { filtered_replacements.push(*replacement); break; } @@ -208,37 +207,20 @@ mod tests { .to_vec(); let expected = FormatAdvice { - replacements: vec![ - Replacement { - offset: 113, + replacements: [113, 147, 161, 165] + .iter() + .map(|offset| Replacement { + offset: *offset, ..Default::default() - }, - Replacement { - offset: 147, - ..Default::default() - }, - Replacement { - offset: 161, - ..Default::default() - }, - Replacement { - offset: 165, - ..Default::default() - }, - ], + }) + .collect(), patched: None, }; let xml = String::from_utf8(xml_raw).unwrap(); let document = quick_xml::de::from_str::(&xml).unwrap(); - assert_eq!(expected.replacements.len(), document.replacements.len()); - for i in 0..expected.replacements.len() { - assert_eq!( - expected.replacements[i].offset, - document.replacements[i].offset - ); - } + assert_eq!(expected, document); } fn formalize_style(style: &str, expected: &str) { diff --git a/cpp-linter/src/common_fs/mod.rs b/cpp-linter/src/common_fs/mod.rs index 15a5884..83db23e 100644 --- a/cpp-linter/src/common_fs/mod.rs +++ b/cpp-linter/src/common_fs/mod.rs @@ -227,11 +227,11 @@ impl FileObj { /// boundary exists at the returned column number. However, the `offset` given to this /// function is expected to originate from diagnostic information provided by /// clang-format or clang-tidy. -pub fn get_line_cols_from_offset(contents: &[u8], offset: usize) -> (usize, usize) { - let lines = contents[0..offset].split(|byte| byte == &b'\n'); - let line_count = lines.clone().count(); +pub fn get_line_cols_from_offset(contents: &[u8], offset: u32) -> (u32, u32) { + let lines = contents[0..offset as usize].split(|byte| byte == &b'\n'); + let line_count = lines.clone().count() as u32; // here we `cols.len() + 1` because columns is not a 0-based count - let column_count = lines.last().map(|cols| cols.len() + 1).unwrap_or(1); + let column_count = lines.last().map(|cols| cols.len() + 1).unwrap_or(1) as u32; (line_count, column_count) } diff --git a/cpp-linter/src/rest_api/github/specific_api.rs b/cpp-linter/src/rest_api/github/specific_api.rs index aa86d5f..a9f789e 100644 --- a/cpp-linter/src/rest_api/github/specific_api.rs +++ b/cpp-linter/src/rest_api/github/specific_api.rs @@ -169,7 +169,7 @@ impl GithubApiClient { let file = file.lock().unwrap(); if let Some(format_advice) = &file.format_advice { // assemble a list of line numbers - let mut lines: Vec = Vec::new(); + let mut lines = Vec::new(); for replacement in &format_advice.replacements { if !lines.contains(&replacement.line) { lines.push(replacement.line); From d5cc0ca8b8fb43b44034e6166c51feffe4d57a9c Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Tue, 21 Jan 2025 02:20:07 -0800 Subject: [PATCH 3/5] remove column count from Replacement struct Also ensure given `offset` is not out of bounds when calculating line number from byte offset. --- cpp-linter/src/clang_tools/clang_format.rs | 13 ++----------- cpp-linter/src/common_fs/mod.rs | 17 +++++++---------- cpp-linter/src/rest_api/github/mod.rs | 7 +------ 3 files changed, 10 insertions(+), 27 deletions(-) diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index 2b81ef5..8f7c4b1 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -15,7 +15,7 @@ use serde::Deserialize; use super::MakeSuggestions; use crate::{ cli::ClangParams, - common_fs::{get_line_cols_from_offset, FileObj}, + common_fs::{get_line_count_from_offset, FileObj}, }; #[derive(Debug, Clone, Deserialize, PartialEq, Eq)] @@ -50,13 +50,6 @@ pub struct Replacement { /// deserialization. #[serde(default)] pub line: u32, - - /// The column number on the line described by the [`Replacement::offset`]. - /// - /// This value is not provided by the XML output, but we calculate it after - /// deserialization. - #[serde(default)] - pub cols: u32, } /// Get a string that summarizes the given `--style` @@ -169,10 +162,8 @@ pub fn run_clang_format( // get line and column numbers from format_advice.offset let mut filtered_replacements = Vec::new(); for replacement in &mut format_advice.replacements { - let (line_number, columns) = - get_line_cols_from_offset(&original_contents, replacement.offset); + let line_number = get_line_count_from_offset(&original_contents, replacement.offset); replacement.line = line_number; - replacement.cols = columns; for range in &ranges { if range.contains(&line_number) { filtered_replacements.push(*replacement); diff --git a/cpp-linter/src/common_fs/mod.rs b/cpp-linter/src/common_fs/mod.rs index 83db23e..4af1a8d 100644 --- a/cpp-linter/src/common_fs/mod.rs +++ b/cpp-linter/src/common_fs/mod.rs @@ -227,12 +227,11 @@ impl FileObj { /// boundary exists at the returned column number. However, the `offset` given to this /// function is expected to originate from diagnostic information provided by /// clang-format or clang-tidy. -pub fn get_line_cols_from_offset(contents: &[u8], offset: u32) -> (u32, u32) { - let lines = contents[0..offset as usize].split(|byte| byte == &b'\n'); - let line_count = lines.clone().count() as u32; - // here we `cols.len() + 1` because columns is not a 0-based count - let column_count = lines.last().map(|cols| cols.len() + 1).unwrap_or(1) as u32; - (line_count, column_count) +pub fn get_line_count_from_offset(contents: &[u8], offset: u32) -> u32 { + let offset = (offset as usize).min(contents.len()); + let lines = contents[0..offset].split(|byte| byte == &b'\n'); + + lines.count() as u32 } /// This was copied from [cargo source code](https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61). @@ -270,7 +269,7 @@ mod test { use std::path::PathBuf; use std::{env::current_dir, fs}; - use super::{get_line_cols_from_offset, normalize_path, FileObj}; + use super::{get_line_count_from_offset, normalize_path, FileObj}; use crate::cli::LinesChangedOnly; // *********************** tests for normalized paths @@ -313,10 +312,8 @@ mod test { #[test] fn translate_byte_offset() { let contents = fs::read(PathBuf::from("tests/demo/demo.cpp")).unwrap(); - let (lines, cols) = get_line_cols_from_offset(&contents, 144); - println!("lines: {lines}, cols: {cols}"); + let lines = get_line_count_from_offset(&contents, 144); assert_eq!(lines, 13); - assert_eq!(cols, 5); } // *********************** tests for FileObj::get_ranges() diff --git a/cpp-linter/src/rest_api/github/mod.rs b/cpp-linter/src/rest_api/github/mod.rs index f0ddd07..f44a588 100644 --- a/cpp-linter/src/rest_api/github/mod.rs +++ b/cpp-linter/src/rest_api/github/mod.rs @@ -314,13 +314,8 @@ mod test { notes, patched: None, }); - let replacements = vec![Replacement { - offset: 0, - line: 1, - cols: 1, - }]; file.format_advice = Some(FormatAdvice { - replacements, + replacements: vec![Replacement { offset: 0, line: 1 }], patched: None, }); files.push(Arc::new(Mutex::new(file))); From 39d54654b050450f4eb576d8b2237cc745c1212c Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Tue, 21 Jan 2025 07:27:06 -0800 Subject: [PATCH 4/5] improve error handling when parsing XML --- cpp-linter/src/clang_tools/clang_format.rs | 41 ++++++++++++++-------- cpp-linter/src/clang_tools/clang_tidy.rs | 11 +++--- cpp-linter/src/clang_tools/mod.rs | 28 +++++++-------- cpp-linter/src/common_fs/file_filter.rs | 2 +- 4 files changed, 45 insertions(+), 37 deletions(-) diff --git a/cpp-linter/src/clang_tools/clang_format.rs b/cpp-linter/src/clang_tools/clang_format.rs index 8f7c4b1..40689d6 100644 --- a/cpp-linter/src/clang_tools/clang_format.rs +++ b/cpp-linter/src/clang_tools/clang_format.rs @@ -94,8 +94,9 @@ pub fn run_clang_format( } let file_name = file.name.to_string_lossy().to_string(); cmd.arg(file.name.to_path_buf().as_os_str()); - let mut patched = None; - if clang_params.format_review { + let patched = if !clang_params.format_review { + None + } else { logs.push(( Level::Info, format!( @@ -112,12 +113,12 @@ pub fn run_clang_format( .join(" ") ), )); - patched = Some( + Some( cmd.output() .with_context(|| format!("Failed to get fixes from clang-format: {file_name}"))? .stdout, - ); - } + ) + }; cmd.arg("--output-replacements-xml"); logs.push(( log::Level::Info, @@ -142,16 +143,19 @@ pub fn run_clang_format( ), )); } - if output.stdout.is_empty() { - return Ok(logs); - } - let xml = String::from_utf8(output.stdout).with_context(|| { - format!("XML output from clang-format was not UTF-8 encoded: {file_name}") - })?; - let mut format_advice = quick_xml::de::from_str::(&xml).unwrap_or(FormatAdvice { - replacements: vec![], - patched: None, - }); + let mut format_advice = if !output.stdout.is_empty() { + let xml = String::from_utf8(output.stdout).with_context(|| { + format!("XML output from clang-format was not UTF-8 encoded: {file_name}") + })?; + quick_xml::de::from_str::(&xml).with_context(|| { + format!("Failed to parse XML output from clang-format for {file_name}") + })? + } else { + FormatAdvice { + replacements: vec![], + patched: None, + } + }; format_advice.patched = patched; if !format_advice.replacements.is_empty() { let original_contents = fs::read(&file.name).with_context(|| { @@ -185,6 +189,13 @@ pub fn run_clang_format( mod tests { use super::{summarize_style, FormatAdvice, Replacement}; + #[test] + fn parse_blank_xml() { + let xml = String::new(); + let result = quick_xml::de::from_str::(&xml); + assert!(result.is_err()); + } + #[test] fn parse_xml() { let xml_raw = r#" diff --git a/cpp-linter/src/clang_tools/clang_tidy.rs b/cpp-linter/src/clang_tools/clang_tidy.rs index fd39b75..0d364af 100644 --- a/cpp-linter/src/clang_tools/clang_tidy.rs +++ b/cpp-linter/src/clang_tools/clang_tidy.rs @@ -276,16 +276,17 @@ pub fn run_clang_tidy( cmd.args(["--line-filter", filter.as_str()]); } } - let mut original_content = None; - if clang_params.tidy_review { + let original_content = if !clang_params.tidy_review { + None + } else { cmd.arg("--fix-errors"); - original_content = Some(fs::read_to_string(&file.name).with_context(|| { + Some(fs::read_to_string(&file.name).with_context(|| { format!( "Failed to cache file's original content before applying clang-tidy changes: {}", file_name.clone() ) - })?); - } + })?) + }; if !clang_params.style.is_empty() { cmd.args(["--format-style", clang_params.style.as_str()]); } diff --git a/cpp-linter/src/clang_tools/mod.rs b/cpp-linter/src/clang_tools/mod.rs index 29d813c..f89078e 100644 --- a/cpp-linter/src/clang_tools/mod.rs +++ b/cpp-linter/src/clang_tools/mod.rs @@ -258,20 +258,18 @@ pub struct ReviewComments { impl ReviewComments { pub fn summarize(&self, clang_versions: &ClangVersions) -> String { let mut body = format!("{COMMENT_MARKER}## Cpp-linter Review\n"); - for t in 0u8..=1 { + for t in 0_usize..=1 { let mut total = 0; let (tool_name, tool_version) = if t == 0 { ("clang-format", clang_versions.format_version.as_ref()) } else { ("clang-tidy", clang_versions.tidy_version.as_ref()) }; - - let tool_total = if let Some(total) = self.tool_total[t as usize] { - total - } else { - // review was not requested from this tool or the tool was not used at all + if tool_version.is_none() { + // this tool was not used at all continue; - }; + } + let tool_total = self.tool_total[t].unwrap_or_default(); // If the tool's version is unknown, then we don't need to output this line. // NOTE: If the tool was invoked at all, then the tool's version shall be known. @@ -295,11 +293,11 @@ impl ReviewComments { .as_str(), ); } - if !self.full_patch[t as usize].is_empty() { + if !self.full_patch[t].is_empty() { body.push_str( format!( "\n
Click here for the full {tool_name} patch\n\n```diff\n{}```\n\n
\n", - self.full_patch[t as usize] + self.full_patch[t] ).as_str() ); } else { @@ -370,8 +368,7 @@ pub trait MakeSuggestions { patch: &mut Patch, summary_only: bool, ) -> Result<()> { - let tool_name = self.get_tool_name(); - let is_tidy_tool = tool_name == "clang-tidy"; + let is_tidy_tool = (&self.get_tool_name() == "clang-tidy") as usize; let hunks_total = patch.num_hunks(); let mut hunks_in_patch = 0u32; let file_name = file_obj @@ -384,13 +381,13 @@ pub trait MakeSuggestions { .to_buf() .with_context(|| "Failed to convert patch to byte array")? .to_vec(); - review_comments.full_patch[is_tidy_tool as usize].push_str( + review_comments.full_patch[is_tidy_tool].push_str( String::from_utf8(patch_buf.to_owned()) .with_context(|| format!("Failed to convert patch to string: {file_name}"))? .as_str(), ); - review_comments.tool_total[is_tidy_tool as usize].get_or_insert(0); if summary_only { + review_comments.tool_total[is_tidy_tool].get_or_insert(0); return Ok(()); } for hunk_id in 0..hunks_total { @@ -447,9 +444,8 @@ pub trait MakeSuggestions { review_comments.comments.push(comment); } } - review_comments.tool_total[is_tidy_tool as usize] = Some( - review_comments.tool_total[is_tidy_tool as usize].unwrap_or_default() + hunks_in_patch, - ); + review_comments.tool_total[is_tidy_tool] = + Some(review_comments.tool_total[is_tidy_tool].unwrap_or_default() + hunks_in_patch); Ok(()) } } diff --git a/cpp-linter/src/common_fs/file_filter.rs b/cpp-linter/src/common_fs/file_filter.rs index a8ba786..82697e0 100644 --- a/cpp-linter/src/common_fs/file_filter.rs +++ b/cpp-linter/src/common_fs/file_filter.rs @@ -111,7 +111,7 @@ impl FileFilter { || (pat.is_dir() && file_name.starts_with(pat)) { log::debug!( - "file {file_name:?} is in {}ignored with domain {pattern:?}.", + "file {file_name:?} is {}ignored with domain {pattern:?}.", if is_ignored { "" } else { "not " } ); return true; From cc494ec2e76a1e8a07cbfe9a7182356463c683f7 Mon Sep 17 00:00:00 2001 From: Brendan <2bndy5@gmail.com> Date: Tue, 21 Jan 2025 15:13:09 -0800 Subject: [PATCH 5/5] update function's doc comment --- cpp-linter/src/common_fs/mod.rs | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/cpp-linter/src/common_fs/mod.rs b/cpp-linter/src/common_fs/mod.rs index 4af1a8d..c07854b 100644 --- a/cpp-linter/src/common_fs/mod.rs +++ b/cpp-linter/src/common_fs/mod.rs @@ -219,18 +219,15 @@ impl FileObj { } } -/// Gets the line and column number from a given `offset` (of bytes) for given -/// `file_path`. +/// Gets the line number for a given `offset` (of bytes) from the given +/// buffer `contents`. /// -/// This computes the line and column numbers from a buffer of bytes read from the -/// `file_path`. In non-UTF-8 encoded files, this does not guarantee that a word -/// boundary exists at the returned column number. However, the `offset` given to this -/// function is expected to originate from diagnostic information provided by -/// clang-format or clang-tidy. +/// The `offset` given to this function is expected to originate from +/// diagnostic information provided by clang-format. Any `offset` out of +/// bounds is clamped to the given `contents` buffer's length. pub fn get_line_count_from_offset(contents: &[u8], offset: u32) -> u32 { let offset = (offset as usize).min(contents.len()); let lines = contents[0..offset].split(|byte| byte == &b'\n'); - lines.count() as u32 } @@ -316,6 +313,20 @@ mod test { assert_eq!(lines, 13); } + #[test] + fn get_line_count_edge_cases() { + // Empty content + assert_eq!(get_line_count_from_offset(&[], 0), 1); + + // No newlines + assert_eq!(get_line_count_from_offset(b"abc", 3), 1); + + // Consecutive newlines + assert_eq!(get_line_count_from_offset(b"a\n\nb", 3), 3); + + // Offset beyond content length + assert_eq!(get_line_count_from_offset(b"a\nb\n", 10), 3); + } // *********************** tests for FileObj::get_ranges() #[test]