Skip to content

Commit 77bfaff

Browse files
committed
add tool to "enforce" semantic line breaks
See #1132
1 parent e68dfb8 commit 77bfaff

File tree

4 files changed

+331
-0
lines changed

4 files changed

+331
-0
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ book
44
book.toml
55

66
ci/date-check/target/
7+
ci/semantic-line-breaks/target/
78

89
# Generated by check-in.sh
910
pulls.json

ci/semantic-line-breaks/Cargo.lock

+193
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ci/semantic-line-breaks/Cargo.toml

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "semantic-line-breaks"
3+
version = "0.0.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
anyhow = "1"
8+
ignore = "0.4"
9+
10+
[dependencies.regex]
11+
version = "1"
12+
features = ["pattern"]

ci/semantic-line-breaks/src/main.rs

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
use std::{env, fs, process};
2+
3+
use anyhow::Result;
4+
use ignore::Walk;
5+
use regex::Regex;
6+
7+
fn main() -> Result<()> {
8+
let mut args = env::args();
9+
if args.len() == 1 {
10+
eprintln!("error: expected root Markdown directory as CLI argument");
11+
process::exit(1);
12+
}
13+
let root_dir = args.nth(1).unwrap();
14+
for result in Walk::new(root_dir) {
15+
let entry = result?;
16+
if entry.file_type().expect("no stdin").is_dir() {
17+
continue;
18+
}
19+
let path = entry.path();
20+
if let Some(extension) = path.extension() {
21+
if extension != "md" {
22+
continue;
23+
}
24+
} else {
25+
continue;
26+
}
27+
let old = fs::read_to_string(path)?;
28+
let new = comply(&old)?;
29+
if new != old {
30+
fs::write(path, new)?;
31+
}
32+
}
33+
Ok(())
34+
}
35+
36+
fn comply(content: &str) -> Result<String> {
37+
let content: Vec<_> = content.lines().map(|line| line.to_owned()).collect();
38+
let mut new_content = content.clone();
39+
let mut new_n = 0;
40+
let mut in_code_block = false;
41+
let split_re = Regex::new(r"(\.|\?|;|!)\s+")?;
42+
let ignore_re = Regex::new(r"(\d\.|\-|\*|r\?)\s+")?;
43+
for (n, line) in content.iter().enumerate() {
44+
if n != 0 {
45+
new_n += 1;
46+
}
47+
if ignore_re.is_match(line) {
48+
continue;
49+
}
50+
// headings
51+
if line.starts_with('#') {
52+
continue;
53+
}
54+
let line = line.trim_end();
55+
if line.is_empty() {
56+
continue;
57+
}
58+
// not eol
59+
if line.contains("e.g.") {
60+
continue;
61+
}
62+
// not eol
63+
if line.contains("i.e.") {
64+
continue;
65+
}
66+
// tables
67+
if line.contains(" | ") {
68+
continue;
69+
}
70+
// code blocks
71+
if line.starts_with("```") {
72+
if in_code_block {
73+
in_code_block = false;
74+
} else {
75+
in_code_block = true;
76+
continue;
77+
}
78+
}
79+
if in_code_block {
80+
continue;
81+
}
82+
if split_re.is_match(line) {
83+
let indent = line.find(|ch: char| !ch.is_whitespace()).unwrap();
84+
let new_lines: Vec<_> = line
85+
.split_inclusive(&split_re)
86+
.map(|portion| format!("{:indent$}{}", "", portion.trim()))
87+
.collect();
88+
new_content.splice(new_n..new_n + 1, new_lines.clone());
89+
new_n += new_lines.len() - 1;
90+
}
91+
}
92+
Ok(new_content.join("\n") + "\n")
93+
}
94+
95+
#[test]
96+
fn test() {
97+
let original = "\
98+
# some heading
99+
100+
must! be; split? now.
101+
1. ignore numbered
102+
ignore | tables
103+
ignore e.g. and i.e. for realsies
104+
```
105+
some code. block
106+
```
107+
some more text.
108+
";
109+
let reformatted = "\
110+
# some heading
111+
112+
must!
113+
be;
114+
split?
115+
now.
116+
1. ignore numbered
117+
ignore | tables
118+
ignore e.g. and i.e. for realsies
119+
```
120+
some code. block
121+
```
122+
some more text.
123+
";
124+
assert_eq!(comply(original).unwrap(), reformatted);
125+
}

0 commit comments

Comments
 (0)