Skip to content

Commit ff4758c

Browse files
committed
Replace regex-based parser for URL lines with open-coded one.
1 parent 5817351 commit ff4758c

File tree

3 files changed

+46
-25
lines changed

3 files changed

+46
-25
lines changed

src/tools/tidy/Cargo.toml

-2
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,3 @@ version = "0.1.0"
44
authors = ["Alex Crichton <[email protected]>"]
55

66
[dependencies]
7-
regex = "*"
8-
lazy_static = "*"

src/tools/tidy/src/main.rs

-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
//! etc. This is run by default on `make check` and as part of the auto
1515
//! builders.
1616
17-
extern crate regex;
18-
#[macro_use] extern crate lazy_static;
19-
2017
use std::fs;
2118
use std::path::{PathBuf, Path};
2219
use std::env;

src/tools/tidy/src/style.rs

+46-20
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ use std::fs::File;
2626
use std::io::prelude::*;
2727
use std::path::Path;
2828

29-
use regex::Regex;
30-
3129
const COLS: usize = 100;
3230
const LICENSE: &'static str = "\
3331
Copyright <year> The Rust Project Developers. See the COPYRIGHT
@@ -40,26 +38,54 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
4038
option. This file may not be copied, modified, or distributed
4139
except according to those terms.";
4240

43-
/// True if LINE is allowed to be longer than the normal limit.
44-
///
45-
/// Currently there is only one exception: if the line is within a
46-
/// comment, and its entire text is one URL (possibly with a Markdown
47-
/// link label in front), then it's allowed to be overlength. This is
48-
/// because Markdown offers no way to split a line in the middle of a
49-
/// URL, and the length of URLs for external references is beyond our
50-
/// control.
51-
fn long_line_is_ok(line: &str) -> bool {
52-
lazy_static! {
53-
static ref URL_RE: Regex = Regex::new(
54-
// This regexp uses the CommonMark definition of link
55-
// label. It thinks any sequence of nonwhitespace
56-
// characters beginning with "http://" or "https://" is a
57-
// URL. Add more schemas as necessary.
58-
r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$"
59-
).unwrap();
41+
/// Parser states for line_is_url.
42+
#[derive(PartialEq)]
43+
#[allow(non_camel_case_types)]
44+
enum LIUState { EXP_COMMENT_START,
45+
EXP_LINK_LABEL_OR_URL,
46+
EXP_URL,
47+
EXP_END }
48+
49+
/// True if LINE appears to be a line comment containing an URL,
50+
/// possibly with a Markdown link label in front, and nothing else.
51+
/// The Markdown link label, if present, may not contain whitespace.
52+
/// Lines of this form are allowed to be overlength, because Markdown
53+
/// offers no way to split a line in the middle of a URL, and the lengths
54+
/// of URLs to external references are beyond our control.
55+
fn line_is_url(line: &str) -> bool {
56+
use self::LIUState::*;
57+
let mut state: LIUState = EXP_COMMENT_START;
58+
59+
for tok in line.split_whitespace() {
60+
match (state, tok) {
61+
(EXP_COMMENT_START, "//") => state = EXP_LINK_LABEL_OR_URL,
62+
(EXP_COMMENT_START, "///") => state = EXP_LINK_LABEL_OR_URL,
63+
(EXP_COMMENT_START, "//!") => state = EXP_LINK_LABEL_OR_URL,
64+
65+
(EXP_LINK_LABEL_OR_URL, w)
66+
if w.len() >= 4 && w.starts_with("[") && w.ends_with("]:")
67+
=> state = EXP_URL,
68+
69+
(EXP_LINK_LABEL_OR_URL, w)
70+
if w.starts_with("http://") || w.starts_with("https://")
71+
=> state = EXP_END,
72+
73+
(EXP_URL, w)
74+
if w.starts_with("http://") || w.starts_with("https://")
75+
=> state = EXP_END,
76+
77+
(_, _) => return false,
78+
}
6079
}
6180

62-
if URL_RE.is_match(line) {
81+
state == EXP_END
82+
}
83+
84+
/// True if LINE is allowed to be longer than the normal limit.
85+
/// Currently there is only one exception, for long URLs, but more
86+
/// may be added in the future.
87+
fn long_line_is_ok(line: &str) -> bool {
88+
if line_is_url(line) {
6389
return true;
6490
}
6591

0 commit comments

Comments
 (0)