Skip to content

Commit 9e11645

Browse files
committed
use quick_xml
- deserialize only what is needed from XML. `length` and `value` fields are not used and are now ignored when parsing XML - read file contents only once when translating byte offset to line and columns
1 parent 05f8b95 commit 9e11645

File tree

5 files changed

+59
-126
lines changed

5 files changed

+59
-126
lines changed

Cargo.lock

+11-39
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cpp-linter/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ lenient_semver = "0.4.2"
2525
log = { version = "0.4.25", features = ["std"] }
2626
openssl = { version = "0.10", features = ["vendored"], optional = true }
2727
openssl-probe = { version = "0.1", optional = true }
28+
quick-xml = {version = "0.37.2", features = ["serialize"]}
2829
regex = "1.11.1"
2930
reqwest = "0.12.12"
3031
semver = "1.0.25"
3132
serde = { version = "1.0.217", features = ["derive"] }
32-
serde-xml-rs = "0.6.0"
3333
serde_json = "1.0.137"
3434
tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"]}
3535
tokio-macros = "2.4.0"

cpp-linter/src/clang_tools/clang_format.rs

+40-73
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@
22
//! output.
33
44
use std::{
5+
fs,
56
process::Command,
67
sync::{Arc, Mutex, MutexGuard},
78
};
89

910
use anyhow::{Context, Result};
1011
use log::Level;
11-
// non-std crates
1212
use serde::Deserialize;
13-
use serde_xml_rs::de::Deserializer;
1413

1514
// project-specific crates/modules
1615
use super::MakeSuggestions;
@@ -19,12 +18,10 @@ use crate::{
1918
common_fs::{get_line_cols_from_offset, FileObj},
2019
};
2120

22-
/// A Structure used to deserialize clang-format's XML output.
23-
#[derive(Debug, Deserialize, PartialEq, Clone)]
24-
#[serde(rename = "replacements")]
21+
#[derive(Debug, Clone, Deserialize)]
2522
pub struct FormatAdvice {
2623
/// A list of [`Replacement`]s that clang-tidy wants to make.
27-
#[serde(rename = "$value")]
24+
#[serde(rename(deserialize = "replacement"))]
2825
pub replacements: Vec<Replacement>,
2926

3027
pub patched: Option<Vec<u8>>,
@@ -41,18 +38,12 @@ impl MakeSuggestions for FormatAdvice {
4138
}
4239

4340
/// A single replacement that clang-format wants to make.
44-
#[derive(Debug, Deserialize, PartialEq)]
41+
#[derive(Debug, PartialEq, Default, Clone, Copy, Deserialize)]
4542
pub struct Replacement {
4643
/// The byte offset where the replacement will start.
44+
#[serde(rename = "@offset")]
4745
pub offset: usize,
4846

49-
/// The amount of bytes that will be removed.
50-
pub length: usize,
51-
52-
/// The bytes (UTF-8 encoded) that will be added at the [`Replacement::offset`] position.
53-
#[serde(rename = "$value")]
54-
pub value: Option<String>,
55-
5647
/// The line number described by the [`Replacement::offset`].
5748
///
5849
/// This value is not provided by the XML output, but we calculate it after
@@ -68,18 +59,6 @@ pub struct Replacement {
6859
pub cols: usize,
6960
}
7061

71-
impl Clone for Replacement {
72-
fn clone(&self) -> Self {
73-
Replacement {
74-
offset: self.offset,
75-
length: self.length,
76-
value: self.value.clone(),
77-
line: self.line,
78-
cols: self.cols,
79-
}
80-
}
81-
}
82-
8362
/// Get a string that summarizes the given `--style`
8463
pub fn summarize_style(style: &str) -> String {
8564
if ["google", "chromium", "microsoft", "mozilla", "webkit"].contains(&style) {
@@ -173,38 +152,37 @@ pub fn run_clang_format(
173152
if output.stdout.is_empty() {
174153
return Ok(logs);
175154
}
176-
let xml = String::from_utf8(output.stdout)
177-
.with_context(|| format!("stdout from clang-format was not UTF-8 encoded: {file_name}"))?
178-
.lines()
179-
.collect::<Vec<&str>>()
180-
.join("");
181-
let config = serde_xml_rs::ParserConfig::new()
182-
.trim_whitespace(false)
183-
.whitespace_to_characters(true)
184-
.ignore_root_level_whitespace(true);
185-
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
186-
let mut format_advice = FormatAdvice::deserialize(&mut Deserializer::new(event_reader))
187-
.unwrap_or(FormatAdvice {
188-
replacements: vec![],
189-
patched: None,
190-
});
155+
let xml = String::from_utf8(output.stdout).with_context(|| {
156+
format!("XML output from clang-format was not UTF-8 encoded: {file_name}")
157+
})?;
158+
let mut format_advice = quick_xml::de::from_str::<FormatAdvice>(&xml).unwrap_or(FormatAdvice {
159+
replacements: vec![],
160+
patched: None,
161+
});
191162
format_advice.patched = patched;
192163
if !format_advice.replacements.is_empty() {
164+
let original_contents = fs::read(&file.name).with_context(|| {
165+
format!(
166+
"Failed to cache file's original content before applying clang-tidy changes: {}",
167+
file_name.clone()
168+
)
169+
})?;
193170
// get line and column numbers from format_advice.offset
194171
let mut filtered_replacements = Vec::new();
195172
for replacement in &mut format_advice.replacements {
196-
let (line_number, columns) = get_line_cols_from_offset(&file.name, replacement.offset);
173+
let (line_number, columns) =
174+
get_line_cols_from_offset(&original_contents, replacement.offset);
197175
replacement.line = line_number;
198176
replacement.cols = columns;
199177
for range in &ranges {
200178
if range.contains(&line_number.try_into().unwrap_or(0)) {
201-
filtered_replacements.push(replacement.clone());
179+
filtered_replacements.push(*replacement);
202180
break;
203181
}
204182
}
205183
if ranges.is_empty() {
206184
// lines_changed_only is disabled
207-
filtered_replacements.push(replacement.clone());
185+
filtered_replacements.push(*replacement);
208186
}
209187
}
210188
format_advice.replacements = filtered_replacements;
@@ -216,7 +194,6 @@ pub fn run_clang_format(
216194
#[cfg(test)]
217195
mod tests {
218196
use super::{summarize_style, FormatAdvice, Replacement};
219-
use serde::Deserialize;
220197

221198
#[test]
222199
fn parse_xml() {
@@ -226,52 +203,42 @@ mod tests {
226203
<replacement offset='147' length='0'> </replacement>
227204
<replacement offset='161' length='0'></replacement>
228205
<replacement offset='165' length='19'>&#10;&#10;</replacement>
229-
</replacements>"#;
230-
//since whitespace is part of the elements' body, we need to remove the LFs first
231-
let xml = xml_raw.lines().collect::<Vec<&str>>().join("");
206+
</replacements>"#
207+
.as_bytes()
208+
.to_vec();
232209

233210
let expected = FormatAdvice {
234211
replacements: vec![
235212
Replacement {
236213
offset: 113,
237-
length: 5,
238-
value: Some(String::from("\n ")),
239-
line: 0,
240-
cols: 0,
214+
..Default::default()
241215
},
242216
Replacement {
243217
offset: 147,
244-
length: 0,
245-
value: Some(String::from(" ")),
246-
line: 0,
247-
cols: 0,
218+
..Default::default()
248219
},
249220
Replacement {
250221
offset: 161,
251-
length: 0,
252-
value: None,
253-
line: 0,
254-
cols: 0,
222+
..Default::default()
255223
},
256224
Replacement {
257225
offset: 165,
258-
length: 19,
259-
value: Some(String::from("\n\n")),
260-
line: 0,
261-
cols: 0,
226+
..Default::default()
262227
},
263228
],
264229
patched: None,
265230
};
266-
let config = serde_xml_rs::ParserConfig::new()
267-
.trim_whitespace(false)
268-
.whitespace_to_characters(true)
269-
.ignore_root_level_whitespace(true);
270-
let event_reader = serde_xml_rs::EventReader::new_with_config(xml.as_bytes(), config);
271-
let document =
272-
FormatAdvice::deserialize(&mut serde_xml_rs::de::Deserializer::new(event_reader))
273-
.unwrap();
274-
assert_eq!(expected, document);
231+
232+
let xml = String::from_utf8(xml_raw).unwrap();
233+
234+
let document = quick_xml::de::from_str::<FormatAdvice>(&xml).unwrap();
235+
assert_eq!(expected.replacements.len(), document.replacements.len());
236+
for i in 0..expected.replacements.len() {
237+
assert_eq!(
238+
expected.replacements[i].offset,
239+
document.replacements[i].offset
240+
);
241+
}
275242
}
276243

277244
fn formalize_style(style: &str, expected: &str) {

cpp-linter/src/common_fs/mod.rs

+7-11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
33
use std::fmt::Debug;
44
use std::fs;
5-
use std::io::Read;
65
use std::path::{Component, Path};
76
use std::{ops::RangeInclusive, path::PathBuf};
87

@@ -228,15 +227,11 @@ impl FileObj {
228227
/// boundary exists at the returned column number. However, the `offset` given to this
229228
/// function is expected to originate from diagnostic information provided by
230229
/// clang-format or clang-tidy.
231-
pub fn get_line_cols_from_offset(file_path: &PathBuf, offset: usize) -> (usize, usize) {
232-
let mut file_buf = vec![0; offset];
233-
fs::File::open(file_path)
234-
.unwrap()
235-
.read_exact(&mut file_buf)
236-
.unwrap();
237-
let lines = file_buf.split(|byte| byte == &b'\n');
230+
pub fn get_line_cols_from_offset(contents: &[u8], offset: usize) -> (usize, usize) {
231+
let lines = contents[0..offset].split(|byte| byte == &b'\n');
238232
let line_count = lines.clone().count();
239-
let column_count = lines.last().unwrap_or(&[]).len() + 1; // +1 because not a 0 based count
233+
// here we `cols.len() + 1` because columns is not a 0-based count
234+
let column_count = lines.last().map(|cols| cols.len() + 1).unwrap_or(1);
240235
(line_count, column_count)
241236
}
242237

@@ -272,8 +267,8 @@ pub fn normalize_path(path: &Path) -> PathBuf {
272267

273268
#[cfg(test)]
274269
mod test {
275-
use std::env::current_dir;
276270
use std::path::PathBuf;
271+
use std::{env::current_dir, fs};
277272

278273
use super::{get_line_cols_from_offset, normalize_path, FileObj};
279274
use crate::cli::LinesChangedOnly;
@@ -317,7 +312,8 @@ mod test {
317312

318313
#[test]
319314
fn translate_byte_offset() {
320-
let (lines, cols) = get_line_cols_from_offset(&PathBuf::from("tests/demo/demo.cpp"), 144);
315+
let contents = fs::read(PathBuf::from("tests/demo/demo.cpp")).unwrap();
316+
let (lines, cols) = get_line_cols_from_offset(&contents, 144);
321317
println!("lines: {lines}, cols: {cols}");
322318
assert_eq!(lines, 13);
323319
assert_eq!(cols, 5);

cpp-linter/src/rest_api/github/mod.rs

-2
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,6 @@ mod test {
316316
});
317317
let replacements = vec![Replacement {
318318
offset: 0,
319-
length: 0,
320-
value: Some(String::new()),
321319
line: 1,
322320
cols: 1,
323321
}];

0 commit comments

Comments
 (0)