From 1c4e49a8940d3f55d9468600636f810c21b61282 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 4 Aug 2021 13:53:40 +0200 Subject: [PATCH 1/3] Move HTML ID list creation into a macro --- src/librustdoc/html/markdown.rs | 101 ++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 47772651bf9b9..9360d1ef73229 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -1436,50 +1436,63 @@ pub struct IdMap { } fn init_id_map() -> FxHashMap { - let mut map = FxHashMap::default(); - // This is the list of IDs used in Javascript. - map.insert("help".to_owned(), 1); - // This is the list of IDs used in HTML generated in Rust (including the ones - // used in tera template files). - map.insert("mainThemeStyle".to_owned(), 1); - map.insert("themeStyle".to_owned(), 1); - map.insert("theme-picker".to_owned(), 1); - map.insert("theme-choices".to_owned(), 1); - map.insert("settings-menu".to_owned(), 1); - map.insert("help-button".to_owned(), 1); - map.insert("main".to_owned(), 1); - map.insert("search".to_owned(), 1); - map.insert("crate-search".to_owned(), 1); - map.insert("render-detail".to_owned(), 1); - map.insert("toggle-all-docs".to_owned(), 1); - map.insert("all-types".to_owned(), 1); - map.insert("default-settings".to_owned(), 1); - map.insert("rustdoc-vars".to_owned(), 1); - map.insert("sidebar-vars".to_owned(), 1); - map.insert("copy-path".to_owned(), 1); - map.insert("TOC".to_owned(), 1); - // This is the list of IDs used by rustdoc sections (but still generated by - // rustdoc). - map.insert("fields".to_owned(), 1); - map.insert("variants".to_owned(), 1); - map.insert("implementors-list".to_owned(), 1); - map.insert("synthetic-implementors-list".to_owned(), 1); - map.insert("foreign-impls".to_owned(), 1); - map.insert("implementations".to_owned(), 1); - map.insert("trait-implementations".to_owned(), 1); - map.insert("synthetic-implementations".to_owned(), 1); - map.insert("blanket-implementations".to_owned(), 1); - map.insert("associated-types".to_owned(), 1); - map.insert("associated-const".to_owned(), 1); - map.insert("required-methods".to_owned(), 1); - map.insert("provided-methods".to_owned(), 1); - map.insert("implementors".to_owned(), 1); - map.insert("synthetic-implementors".to_owned(), 1); - map.insert("trait-implementations-list".to_owned(), 1); - map.insert("synthetic-implementations-list".to_owned(), 1); - map.insert("blanket-implementations-list".to_owned(), 1); - map.insert("deref-methods".to_owned(), 1); - map + // We declare the ID map this way to make it simpler for the HTML IDs tidy check to extract + // the IDs. + macro_rules! html_id_map { + ($($id:literal,)+) => {{ + let mut map = FxHashMap::default(); + + $( + map.insert($id.to_owned(), 1); + )+ + map + }} + } + + html_id_map!( + // This is the list of IDs used in Javascript. + "help", + // This is the list of IDs used in HTML generated in Rust (including the ones + // used in tera template files). + "mainThemeStyle", + "themeStyle", + "theme-picker", + "theme-choices", + "settings-menu", + "help-button", + "main", + "search", + "crate-search", + "render-detail", + "toggle-all-docs", + "all-types", + "default-settings", + "rustdoc-vars", + "sidebar-vars", + "copy-path", + "TOC", + // This is the list of IDs used by rustdoc sections (but still generated by + // rustdoc). + "fields", + "variants", + "implementors-list", + "synthetic-implementors-list", + "foreign-impls", + "implementations", + "trait-implementations", + "synthetic-implementations", + "blanket-implementations", + "associated-types", + "associated-const", + "required-methods", + "provided-methods", + "implementors", + "synthetic-implementors", + "trait-implementations-list", + "synthetic-implementations-list", + "blanket-implementations-list", + "deref-methods", + ) } impl IdMap { From af660363b3fc5a6b9878511980966d775530a65a Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 9 Jun 2021 22:35:21 +0200 Subject: [PATCH 2/3] Add new tidy check to ensure that rustdoc DOM IDs are all declared as expected --- src/tools/tidy/src/lib.rs | 1 + src/tools/tidy/src/main.rs | 3 + src/tools/tidy/src/rustdoc_html_ids.rs | 166 +++++++++++++++++++++++++ 3 files changed, 170 insertions(+) create mode 100644 src/tools/tidy/src/rustdoc_html_ids.rs diff --git a/src/tools/tidy/src/lib.rs b/src/tools/tidy/src/lib.rs index a20ea3235ed46..84480e8558e31 100644 --- a/src/tools/tidy/src/lib.rs +++ b/src/tools/tidy/src/lib.rs @@ -47,6 +47,7 @@ pub mod extdeps; pub mod features; pub mod pal; pub mod primitive_docs; +pub mod rustdoc_html_ids; pub mod style; pub mod target_specific_tests; pub mod ui_tests; diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs index d555f7c8e34ff..df7879606e873 100644 --- a/src/tools/tidy/src/main.rs +++ b/src/tools/tidy/src/main.rs @@ -69,6 +69,9 @@ fn main() { check!(errors, &compiler_path); check!(error_codes_check, &[&src_path, &compiler_path]); + // Checks for rustdoc. + check!(rustdoc_html_ids, &src_path); + // Checks that only make sense for the std libs. check!(pal, &library_path); check!(primitive_docs, &library_path); diff --git a/src/tools/tidy/src/rustdoc_html_ids.rs b/src/tools/tidy/src/rustdoc_html_ids.rs new file mode 100644 index 0000000000000..3b8f3359f9452 --- /dev/null +++ b/src/tools/tidy/src/rustdoc_html_ids.rs @@ -0,0 +1,166 @@ +//! Checks that the rustdoc ID map is up-to-date. The goal here is to check a few things: +//! +//! * All IDs created by rustdoc (through JS or files generation) are declared in the ID map. +//! * There are no unused IDs. + +use std::collections::HashMap; +use std::ffi::OsStr; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::Path; + +use regex::Regex; + +const ID_MAP_PATH: &str = "librustdoc/html/markdown.rs"; +const IDS_USED_IN_JS: &[&str] = &[ + // This one is created in the JS and therefore cannot be found in rust files. + "help", + // This one is used when we need to use a "default" ID. + "deref-methods", +]; + +fn extract_ids(path: &Path, bad: &mut bool) -> HashMap { + let file = File::open(path).expect("failed to open file to extract rustdoc IDs"); + let buf_reader = BufReader::new(file); + let mut iter = buf_reader.lines(); + let mut ids = HashMap::new(); + + while let Some(Ok(line)) = iter.next() { + if line.trim_start().starts_with("html_id_map!(") { + break; + } + } + // We're now in the function body, time to retrieve the IDs! + while let Some(Ok(line)) = iter.next() { + let line = line.trim_start(); + if line.starts_with("// ") { + // It's a comment, ignoring this line... + continue; + } else if line.starts_with(")") { + // We reached the end of the IDs declaration list. + break; + } + let id = line.split('"').skip(1).next().unwrap(); + if ids.insert(id.to_owned(), 0).is_some() { + eprintln!( + "=> ID `{}` is defined more than once in the ID map in file `{}`", + id, ID_MAP_PATH + ); + *bad = true; + } + } + if ids.is_empty() { + eprintln!("=> No IDs were found in rustdoc in file `{}`...", ID_MAP_PATH); + *bad = true; + } + ids +} + +fn check_id( + path: &Path, + id: &str, + ids: &mut HashMap, + line_nb: usize, + bad: &mut bool, +) { + if id.contains('{') { + // This is a formatted ID, no need to check it! + return; + } + let id = id.to_owned(); + match ids.get_mut(&id) { + Some(nb) => *nb += 1, + None => { + eprintln!( + "=> ID `{}` in file `{}` at line {} is missing from `init_id_map`", + id, + path.display(), + line_nb + 1, + ); + *bad = true; + } + } +} + +fn check_ids( + path: &Path, + f: &str, + ids: &mut HashMap, + regex: &Regex, + bad: &mut bool, + small_section_header_checked: &mut usize, +) { + let mut is_checking_small_section_header = None; + + for (line_nb, line) in f.lines().enumerate() { + let trimmed = line.trim_start(); + // We're not interested in comments or doc comments. + if trimmed.starts_with("//") { + continue; + } else if let Some(start_line) = is_checking_small_section_header { + if line_nb == start_line + 2 { + check_id(path, trimmed.split('"').skip(1).next().unwrap(), ids, line_nb, bad); + is_checking_small_section_header = None; + } + } else if trimmed.starts_with("write_small_section_header(") { + // This function is used to create section: the second argument of the function is an + // ID and we need to check it as well, hence this specific check... + if trimmed.contains(',') { + // This is a call made on one line, so we can simply check it! + check_id(path, trimmed.split('"').skip(1).next().unwrap(), ids, line_nb, bad); + } else { + is_checking_small_section_header = Some(line_nb); + } + *small_section_header_checked += 1; + continue; + } + for cap in regex.captures_iter(line) { + check_id(path, &cap[1], ids, line_nb, bad); + } + } +} + +pub fn check(path: &Path, bad: &mut bool) { + // matches ` id="blabla"` + let regex = Regex::new(r#"[\s"]id=\\?["']([^\s\\]+)\\?["'][\s\\>"{]"#).unwrap(); + + println!("Checking rustdoc IDs..."); + let mut ids = extract_ids(&path.join(ID_MAP_PATH), bad); + let mut small_section_header_checked = 0; + if *bad { + return; + } + super::walk( + &path.join("librustdoc/html"), + &mut |path| super::filter_dirs(path), + &mut |entry, contents| { + let path = entry.path(); + let file_name = entry.file_name(); + if path.extension() == Some(OsStr::new("html")) + || (path.extension() == Some(OsStr::new("rs")) && file_name != "tests.rs") + { + check_ids(path, contents, &mut ids, ®ex, bad, &mut small_section_header_checked); + } + }, + ); + if small_section_header_checked == 0 { + eprintln!( + "No call to the `write_small_section_header` function was found. Was it renamed?", + ); + *bad = true; + } + for (id, nb) in ids { + if IDS_USED_IN_JS.iter().any(|i| i == &id) { + if nb != 0 { + eprintln!("=> ID `{}` is not supposed to be used in Rust code but in the JS!", id); + *bad = true; + } + } else if nb == 0 { + eprintln!( + "=> ID `{}` is unused, it should be removed from `init_id_map` in file `{}`", + id, ID_MAP_PATH + ); + *bad = true; + } + } +} From 802a2934493bb42df797821d7d774c2cac285f0b Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 15 Sep 2021 14:31:20 +0200 Subject: [PATCH 3/3] Improve code and add documentation --- src/librustdoc/html/markdown.rs | 2 ++ src/librustdoc/html/render/print_item.rs | 2 ++ src/tools/tidy/src/rustdoc_html_ids.rs | 21 ++++++++++++++------- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 9360d1ef73229..395500d15404e 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -1449,6 +1449,8 @@ fn init_id_map() -> FxHashMap { }} } + // IMPORTANT: Do NOT change the formatting or name of this macro + // without updating the tidy check. html_id_map!( // This is the list of IDs used in Javascript. "help", diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs index d07ef6db4c6b0..041d3ad9679c5 100644 --- a/src/librustdoc/html/render/print_item.rs +++ b/src/librustdoc/html/render/print_item.rs @@ -628,6 +628,8 @@ fn item_trait(w: &mut Buffer, cx: &Context<'_>, it: &clean::Item, t: &clean::Tra // Trait documentation document(w, cx, it, None, HeadingOffset::H2); + // This function is checked in tidy for rustdoc IDs. If you rename/update it, don't forget + // to update the `src/tools/tidy/rustdoc_html_ids.rs` file. fn write_small_section_header(w: &mut Buffer, id: &str, title: &str, extra_content: &str) { write!( w, diff --git a/src/tools/tidy/src/rustdoc_html_ids.rs b/src/tools/tidy/src/rustdoc_html_ids.rs index 3b8f3359f9452..6428a56372ecd 100644 --- a/src/tools/tidy/src/rustdoc_html_ids.rs +++ b/src/tools/tidy/src/rustdoc_html_ids.rs @@ -1,6 +1,7 @@ //! Checks that the rustdoc ID map is up-to-date. The goal here is to check a few things: //! -//! * All IDs created by rustdoc (through JS or files generation) are declared in the ID map. +//! * All IDs created by rustdoc (through JS or `.html` files generation) are declared in the +//! ID map. //! * There are no unused IDs. use std::collections::HashMap; @@ -31,7 +32,8 @@ fn extract_ids(path: &Path, bad: &mut bool) -> HashMap { } } // We're now in the function body, time to retrieve the IDs! - while let Some(Ok(line)) = iter.next() { + while let Some(line) = iter.next() { + let line = line.unwrap(); let line = line.trim_start(); if line.starts_with("// ") { // It's a comment, ignoring this line... @@ -44,13 +46,14 @@ fn extract_ids(path: &Path, bad: &mut bool) -> HashMap { if ids.insert(id.to_owned(), 0).is_some() { eprintln!( "=> ID `{}` is defined more than once in the ID map in file `{}`", - id, ID_MAP_PATH + id, + path.display(), ); *bad = true; } } if ids.is_empty() { - eprintln!("=> No IDs were found in rustdoc in file `{}`...", ID_MAP_PATH); + eprintln!("=> No IDs were found in rustdoc in file `{}`...", path.display()); *bad = true; } ids @@ -102,7 +105,11 @@ fn check_ids( check_id(path, trimmed.split('"').skip(1).next().unwrap(), ids, line_nb, bad); is_checking_small_section_header = None; } - } else if trimmed.starts_with("write_small_section_header(") { + } else if trimmed.contains("write_small_section_header(") + && !trimmed.contains("fn write_small_section_header(") + { + // First we extract the arguments. + let trimmed = trimmed.split("write_small_section_header(").skip(1).next().unwrap_or(""); // This function is used to create section: the second argument of the function is an // ID and we need to check it as well, hence this specific check... if trimmed.contains(',') { @@ -145,12 +152,12 @@ pub fn check(path: &Path, bad: &mut bool) { ); if small_section_header_checked == 0 { eprintln!( - "No call to the `write_small_section_header` function was found. Was it renamed?", + "=> No call to the `write_small_section_header` function was found. Was it renamed?", ); *bad = true; } for (id, nb) in ids { - if IDS_USED_IN_JS.iter().any(|i| i == &id) { + if IDS_USED_IN_JS.contains(&id.as_str()) { if nb != 0 { eprintln!("=> ID `{}` is not supposed to be used in Rust code but in the JS!", id); *bad = true;