Skip to content

Commit 9d782b5

Browse files
authored
refactor: switch to quick_xml library (#101)
* use `quick_xml` * deserialize only what is needed from XML. The `cols`, `length`, and `value` fields were never used and are now ignored when parsing XML * read file contents only once when translating byte offset to line * improve error handling when parsing XML
1 parent 05f8b95 commit 9d782b5

File tree

9 files changed

+115
-198
lines changed

9 files changed

+115
-198
lines changed

Cargo.lock

+11-39
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cpp-linter/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ lenient_semver = "0.4.2"
2525
log = { version = "0.4.25", features = ["std"] }
2626
openssl = { version = "0.10", features = ["vendored"], optional = true }
2727
openssl-probe = { version = "0.1", optional = true }
28+
quick-xml = {version = "0.37.2", features = ["serialize"]}
2829
regex = "1.11.1"
2930
reqwest = "0.12.12"
3031
semver = "1.0.25"
3132
serde = { version = "1.0.217", features = ["derive"] }
32-
serde-xml-rs = "0.6.0"
3333
serde_json = "1.0.137"
3434
tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"]}
3535
tokio-macros = "2.4.0"

cpp-linter/src/clang_tools/clang_format.rs

+55-104
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,26 @@
22
//! output.
33
44
use std::{
5+
fs,
56
process::Command,
67
sync::{Arc, Mutex, MutexGuard},
78
};
89

910
use anyhow::{Context, Result};
1011
use log::Level;
11-
// non-std crates
1212
use serde::Deserialize;
13-
use serde_xml_rs::de::Deserializer;
1413

1514
// project-specific crates/modules
1615
use super::MakeSuggestions;
1716
use crate::{
1817
cli::ClangParams,
19-
common_fs::{get_line_cols_from_offset, FileObj},
18+
common_fs::{get_line_count_from_offset, FileObj},
2019
};
2120

22-
/// A Structure used to deserialize clang-format's XML output.
23-
#[derive(Debug, Deserialize, PartialEq, Clone)]
24-
#[serde(rename = "replacements")]
21+
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
2522
pub struct FormatAdvice {
2623
/// A list of [`Replacement`]s that clang-tidy wants to make.
27-
#[serde(rename = "$value")]
24+
#[serde(rename(deserialize = "replacement"))]
2825
pub replacements: Vec<Replacement>,
2926

3027
pub patched: Option<Vec<u8>>,
@@ -41,43 +38,18 @@ impl MakeSuggestions for FormatAdvice {
4138
}
4239

4340
/// A single replacement that clang-format wants to make.
44-
#[derive(Debug, Deserialize, PartialEq)]
41+
#[derive(Debug, PartialEq, Eq, Default, Clone, Copy, Deserialize)]
4542
pub struct Replacement {
4643
/// The byte offset where the replacement will start.
47-
pub offset: usize,
48-
49-
/// The amount of bytes that will be removed.
50-
pub length: usize,
51-
52-
/// The bytes (UTF-8 encoded) that will be added at the [`Replacement::offset`] position.
53-
#[serde(rename = "$value")]
54-
pub value: Option<String>,
44+
#[serde(rename = "@offset")]
45+
pub offset: u32,
5546

5647
/// The line number described by the [`Replacement::offset`].
5748
///
5849
/// This value is not provided by the XML output, but we calculate it after
5950
/// deserialization.
6051
#[serde(default)]
61-
pub line: usize,
62-
63-
/// The column number on the line described by the [`Replacement::offset`].
64-
///
65-
/// This value is not provided by the XML output, but we calculate it after
66-
/// deserialization.
67-
#[serde(default)]
68-
pub cols: usize,
69-
}
70-
71-
impl Clone for Replacement {
72-
fn clone(&self) -> Self {
73-
Replacement {
74-
offset: self.offset,
75-
length: self.length,
76-
value: self.value.clone(),
77-
line: self.line,
78-
cols: self.cols,
79-
}
80-
}
52+
pub line: u32,
8153
}
8254

8355
/// Get a string that summarizes the given `--style`
@@ -122,8 +94,9 @@ pub fn run_clang_format(
12294
}
12395
let file_name = file.name.to_string_lossy().to_string();
12496
cmd.arg(file.name.to_path_buf().as_os_str());
125-
let mut patched = None;
126-
if clang_params.format_review {
97+
let patched = if !clang_params.format_review {
98+
None
99+
} else {
127100
logs.push((
128101
Level::Info,
129102
format!(
@@ -140,12 +113,12 @@ pub fn run_clang_format(
140113
.join(" ")
141114
),
142115
));
143-
patched = Some(
116+
Some(
144117
cmd.output()
145118
.with_context(|| format!("Failed to get fixes from clang-format: {file_name}"))?
146119
.stdout,
147-
);
148-
}
120+
)
121+
};
149122
cmd.arg("--output-replacements-xml");
150123
logs.push((
151124
log::Level::Info,
@@ -170,41 +143,40 @@ pub fn run_clang_format(
170143
),
171144
));
172145
}
173-
if output.stdout.is_empty() {
174-
return Ok(logs);
175-
}
176-
let xml = String::from_utf8(output.stdout)
177-
.with_context(|| format!("stdout from clang-format was not UTF-8 encoded: {file_name}"))?
178-
.lines()
179-
.collect::<Vec<&str>>()
180-
.join("");
181-
let config = serde_xml_rs::ParserConfig::new()
182-
.trim_whitespace(false)
183-
.whitespace_to_characters(true)
184-
.ignore_root_level_whitespace(true);
185-
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
186-
let mut format_advice = FormatAdvice::deserialize(&mut Deserializer::new(event_reader))
187-
.unwrap_or(FormatAdvice {
146+
let mut format_advice = if !output.stdout.is_empty() {
147+
let xml = String::from_utf8(output.stdout).with_context(|| {
148+
format!("XML output from clang-format was not UTF-8 encoded: {file_name}")
149+
})?;
150+
quick_xml::de::from_str::<FormatAdvice>(&xml).with_context(|| {
151+
format!("Failed to parse XML output from clang-format for {file_name}")
152+
})?
153+
} else {
154+
FormatAdvice {
188155
replacements: vec![],
189156
patched: None,
190-
});
157+
}
158+
};
191159
format_advice.patched = patched;
192160
if !format_advice.replacements.is_empty() {
161+
let original_contents = fs::read(&file.name).with_context(|| {
162+
format!(
163+
"Failed to read file's original content before translating byte offsets: {file_name}",
164+
)
165+
})?;
193166
// get line and column numbers from format_advice.offset
194167
let mut filtered_replacements = Vec::new();
195168
for replacement in &mut format_advice.replacements {
196-
let (line_number, columns) = get_line_cols_from_offset(&file.name, replacement.offset);
169+
let line_number = get_line_count_from_offset(&original_contents, replacement.offset);
197170
replacement.line = line_number;
198-
replacement.cols = columns;
199171
for range in &ranges {
200-
if range.contains(&line_number.try_into().unwrap_or(0)) {
201-
filtered_replacements.push(replacement.clone());
172+
if range.contains(&line_number) {
173+
filtered_replacements.push(*replacement);
202174
break;
203175
}
204176
}
205177
if ranges.is_empty() {
206178
// lines_changed_only is disabled
207-
filtered_replacements.push(replacement.clone());
179+
filtered_replacements.push(*replacement);
208180
}
209181
}
210182
format_advice.replacements = filtered_replacements;
@@ -216,7 +188,13 @@ pub fn run_clang_format(
216188
#[cfg(test)]
217189
mod tests {
218190
use super::{summarize_style, FormatAdvice, Replacement};
219-
use serde::Deserialize;
191+
192+
#[test]
193+
fn parse_blank_xml() {
194+
let xml = String::new();
195+
let result = quick_xml::de::from_str::<FormatAdvice>(&xml);
196+
assert!(result.is_err());
197+
}
220198

221199
#[test]
222200
fn parse_xml() {
@@ -226,51 +204,24 @@ mod tests {
226204
<replacement offset='147' length='0'> </replacement>
227205
<replacement offset='161' length='0'></replacement>
228206
<replacement offset='165' length='19'>&#10;&#10;</replacement>
229-
</replacements>"#;
230-
//since whitespace is part of the elements' body, we need to remove the LFs first
231-
let xml = xml_raw.lines().collect::<Vec<&str>>().join("");
207+
</replacements>"#
208+
.as_bytes()
209+
.to_vec();
232210

233211
let expected = FormatAdvice {
234-
replacements: vec![
235-
Replacement {
236-
offset: 113,
237-
length: 5,
238-
value: Some(String::from("\n ")),
239-
line: 0,
240-
cols: 0,
241-
},
242-
Replacement {
243-
offset: 147,
244-
length: 0,
245-
value: Some(String::from(" ")),
246-
line: 0,
247-
cols: 0,
248-
},
249-
Replacement {
250-
offset: 161,
251-
length: 0,
252-
value: None,
253-
line: 0,
254-
cols: 0,
255-
},
256-
Replacement {
257-
offset: 165,
258-
length: 19,
259-
value: Some(String::from("\n\n")),
260-
line: 0,
261-
cols: 0,
262-
},
263-
],
212+
replacements: [113, 147, 161, 165]
213+
.iter()
214+
.map(|offset| Replacement {
215+
offset: *offset,
216+
..Default::default()
217+
})
218+
.collect(),
264219
patched: None,
265220
};
266-
let config = serde_xml_rs::ParserConfig::new()
267-
.trim_whitespace(false)
268-
.whitespace_to_characters(true)
269-
.ignore_root_level_whitespace(true);
270-
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
271-
let document =
272-
FormatAdvice::deserialize(&mut serde_xml_rs::de::Deserializer::new(event_reader))
273-
.unwrap();
221+
222+
let xml = String::from_utf8(xml_raw).unwrap();
223+
224+
let document = quick_xml::de::from_str::<FormatAdvice>(&xml).unwrap();
274225
assert_eq!(expected, document);
275226
}
276227

cpp-linter/src/clang_tools/clang_tidy.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -276,16 +276,17 @@ pub fn run_clang_tidy(
276276
cmd.args(["--line-filter", filter.as_str()]);
277277
}
278278
}
279-
let mut original_content = None;
280-
if clang_params.tidy_review {
279+
let original_content = if !clang_params.tidy_review {
280+
None
281+
} else {
281282
cmd.arg("--fix-errors");
282-
original_content = Some(fs::read_to_string(&file.name).with_context(|| {
283+
Some(fs::read_to_string(&file.name).with_context(|| {
283284
format!(
284285
"Failed to cache file's original content before applying clang-tidy changes: {}",
285286
file_name.clone()
286287
)
287-
})?);
288-
}
288+
})?)
289+
};
289290
if !clang_params.style.is_empty() {
290291
cmd.args(["--format-style", clang_params.style.as_str()]);
291292
}

0 commit comments

Comments
 (0)