diff --git a/build.rs b/build.rs index 87fdfd2..3a4ea4a 100644 --- a/build.rs +++ b/build.rs @@ -1,320 +1,370 @@ -use self::shared::ModifierSet; -use std::fmt::Write; -use std::iter; -use std::iter::Peekable; use std::path::Path; -type StrResult = Result; - #[path = "src/shared.rs"] mod shared; -/// A module of definitions. -struct Module<'a>(Vec<(&'a str, Binding<'a>)>); +fn main() { + println!("cargo::rerun-if-changed=build.rs"); + + let out = std::env::var_os("OUT_DIR").unwrap(); + let out_path = Path::new(&out); + + module_files::generate_modules(out_path); -impl<'a> Module<'a> { - fn new(mut list: Vec<(&'a str, Binding<'a>)>) -> Self { - list.sort_by_key(|&(name, _)| name); - Self(list) + #[cfg(feature = "_test-unicode-conformance")] + { + const UNICODE_VERSION: &str = "16.0.0"; + unicode_data::download_file( + UNICODE_VERSION, + "emoji/emoji-variation-sequences.txt", + out_path.join("emoji-variation-sequences.txt"), + ); + unicode_data::download_file( + UNICODE_VERSION, + "StandardizedVariants.txt", + out_path.join("StandardizedVariants.txt"), + ) } } -/// A definition bound in a module, with metadata. -struct Binding<'a> { - def: Def<'a>, - deprecation: Option<&'a str>, -} +mod module_files { + use super::shared::ModifierSet; + use std::fmt::Write; + use std::iter; + use std::iter::Peekable; + use std::path::Path; -/// A definition in a module. -enum Def<'a> { - Symbol(Symbol<'a>), - Module(Module<'a>), -} + type StrResult = Result; -/// A symbol, either a leaf or with modifiers with optional deprecation. -enum Symbol<'a> { - Single(String), - Multi(Vec<(ModifierSet<&'a str>, String, Option<&'a str>)>), -} + /// A module of definitions. + struct Module<'a>(Vec<(&'a str, Binding<'a>)>); -/// A single line during parsing. -#[derive(Debug, Clone)] -enum Line<'a> { - Blank, - Deprecated(&'a str), - ModuleStart(&'a str), - ModuleEnd, - Symbol(&'a str, Option), - Variant(ModifierSet<&'a str>, String), - Eof, -} + impl<'a> Module<'a> { + fn new(mut list: Vec<(&'a str, Binding<'a>)>) -> Self { + list.sort_by_key(|&(name, _)| name); + Self(list) + } + } -#[derive(Debug, Clone)] -enum Declaration<'a> { - ModuleStart(&'a str, Option<&'a str>), - ModuleEnd, - Symbol(&'a str, Option, Option<&'a str>), - Variant(ModifierSet<&'a str>, String, Option<&'a str>), -} + /// A definition bound in a module, with metadata. + struct Binding<'a> { + def: Def<'a>, + deprecation: Option<&'a str>, + } -fn main() { - println!("cargo::rerun-if-changed=build.rs"); + /// A definition in a module. + enum Def<'a> { + Symbol(Symbol<'a>), + Module(Module<'a>), + } - let mut buf = String::new(); - process(&mut buf, Path::new("src/modules/sym.txt"), "SYM", "Named general symbols."); - process(&mut buf, Path::new("src/modules/emoji.txt"), "EMOJI", "Named emoji."); + /// A symbol, either a leaf or with modifiers with optional deprecation. + enum Symbol<'a> { + Single(String), + Multi(Vec<(ModifierSet<&'a str>, String, Option<&'a str>)>), + } - let out = std::env::var_os("OUT_DIR").unwrap(); - let dest = Path::new(&out).join("out.rs"); - std::fs::write(&dest, buf).unwrap(); + /// A single line during parsing. + #[derive(Debug, Clone)] + enum Line<'a> { + Blank, + Deprecated(&'a str), + ModuleStart(&'a str), + ModuleEnd, + Symbol(&'a str, Option), + Variant(ModifierSet<&'a str>, String), + Eof, + } - #[cfg(feature = "_test-unicode-conformance")] - { - let emoji_vs_list = Path::new(&out).join("emoji-variation-sequences.txt"); - if !std::fs::read_to_string(&emoji_vs_list) - .is_ok_and(|text| text.contains("Emoji Version 16.0")) - { - let content = ureq::get( - "https://www.unicode.org/Public/16.0.0/ucd/emoji/emoji-variation-sequences.txt", - ) - .call() - .unwrap() - .body_mut() - .read_to_string() - .unwrap(); - std::fs::write(emoji_vs_list, content).unwrap(); - } + #[derive(Debug, Clone)] + enum Declaration<'a> { + ModuleStart(&'a str, Option<&'a str>), + ModuleEnd, + Symbol(&'a str, Option, Option<&'a str>), + Variant(ModifierSet<&'a str>, String, Option<&'a str>), } -} -/// Processes a single file and turns it into a global module. -fn process(buf: &mut String, file: &Path, name: &str, desc: &str) { - println!("cargo::rerun-if-changed={}", file.display()); - - let text = std::fs::read_to_string(file).unwrap(); - let mut line_nr = 0; - let mut deprecation = None; - let mut iter = text - .lines() - .inspect(|_| line_nr += 1) - .map(tokenize) - .chain(iter::once(Ok(Line::Eof))) - .filter_map(|line| match line { - Err(message) => Some(Err(message)), - Ok(Line::Blank) => None, - Ok(Line::Deprecated(message)) => { - if deprecation.is_some() { - Some(Err(String::from("duplicate `@deprecated:`"))) - } else { - deprecation = Some(message); - None + /// Generate Codex modules from files. + pub(crate) fn generate_modules(out: &Path) { + let mut buf = String::new(); + + process( + &mut buf, + Path::new("src/modules/sym.txt"), + "SYM", + "Named general symbols.", + ); + + process(&mut buf, Path::new("src/modules/emoji.txt"), "EMOJI", "Named emoji."); + + std::fs::write(out.join("out.rs"), buf).unwrap(); + } + + /// Processes a single file and turns it into a global module. + fn process(buf: &mut String, file: &Path, name: &str, desc: &str) { + println!("cargo::rerun-if-changed={}", file.display()); + + let text = std::fs::read_to_string(file).unwrap(); + let mut line_nr = 0; + let mut deprecation = None; + let mut iter = text + .lines() + .inspect(|_| line_nr += 1) + .map(tokenize) + .chain(iter::once(Ok(Line::Eof))) + .filter_map(|line| match line { + Err(message) => Some(Err(message)), + Ok(Line::Blank) => None, + Ok(Line::Deprecated(message)) => { + if deprecation.is_some() { + Some(Err(String::from("duplicate `@deprecated:`"))) + } else { + deprecation = Some(message); + None + } } - } - Ok(Line::ModuleStart(name)) => { - Some(Ok(Declaration::ModuleStart(name, deprecation.take()))) - } - Ok(Line::ModuleEnd) => { - if deprecation.is_some() { - Some(Err(String::from("dangling `@deprecated:`"))) - } else { - Some(Ok(Declaration::ModuleEnd)) + Ok(Line::ModuleStart(name)) => { + Some(Ok(Declaration::ModuleStart(name, deprecation.take()))) } - } - Ok(Line::Symbol(name, value)) => { - Some(Ok(Declaration::Symbol(name, value, deprecation.take()))) - } - Ok(Line::Variant(modifiers, value)) => { - Some(Ok(Declaration::Variant(modifiers, value, deprecation.take()))) - } - Ok(Line::Eof) => { - deprecation.map(|_| Err(String::from("dangling `@deprecated:`"))) - } - }) - .peekable(); - - let module = match parse(&mut iter) { - Ok(defs) => Module::new(defs), - Err(e) => { - let message = format!("{}:{}: {e}", file.display(), line_nr); - println!("cargo::warning={message}"); - std::process::exit(1); - } - }; - - write!(buf, "#[doc = {desc:?}] pub const {name}: Module = ").unwrap(); - encode(buf, &module); - buf.push(';'); -} + Ok(Line::ModuleEnd) => { + if deprecation.is_some() { + Some(Err(String::from("dangling `@deprecated:`"))) + } else { + Some(Ok(Declaration::ModuleEnd)) + } + } + Ok(Line::Symbol(name, value)) => { + Some(Ok(Declaration::Symbol(name, value, deprecation.take()))) + } + Ok(Line::Variant(modifiers, value)) => { + Some(Ok(Declaration::Variant(modifiers, value, deprecation.take()))) + } + Ok(Line::Eof) => { + deprecation.map(|_| Err(String::from("dangling `@deprecated:`"))) + } + }) + .peekable(); -/// Tokenizes and classifies a line. -fn tokenize(line: &str) -> StrResult> { - // Strip comments. - let line = line.split_once("//").map_or(line, |(head, _)| head); + let module = match parse(&mut iter) { + Ok(defs) => Module::new(defs), + Err(e) => { + let message = format!("{}:{}: {e}", file.display(), line_nr); + println!("cargo::warning={message}"); + std::process::exit(1); + } + }; - // Ignore empty lines. - let line = line.trim(); - if line.is_empty() { - return Ok(Line::Blank); + write!(buf, "#[doc = {desc:?}] pub const {name}: Module = ").unwrap(); + encode(buf, &module); + buf.push(';'); } - let (head, tail) = match line.split_once(' ') { - Some((a, b)) => (a, Some(b)), - None => (line, None), - }; - - Ok(if head == "@deprecated:" { - Line::Deprecated(tail.ok_or("missing deprecation message")?.trim()) - } else if tail == Some("{") { - validate_ident(head)?; - Line::ModuleStart(head) - } else if head == "}" && tail.is_none() { - Line::ModuleEnd - } else if let Some(rest) = head.strip_prefix('.') { - for part in rest.split('.') { - validate_ident(part)?; + /// Tokenizes and classifies a line. + fn tokenize(line: &str) -> StrResult> { + // Strip comments. + let line = line.split_once("//").map_or(line, |(head, _)| head); + + // Ignore empty lines. + let line = line.trim(); + if line.is_empty() { + return Ok(Line::Blank); } - let value = decode_value(tail.ok_or("missing char")?)?; - Line::Variant(ModifierSet::from_raw_dotted(rest), value) - } else { - validate_ident(head)?; - let value = tail.map(decode_value).transpose()?; - Line::Symbol(head, value) - }) -} -/// Ensures that a string is a valid identifier. In `codex`, we use very strict -/// rules and allow only alphabetic ASCII chars. -fn validate_ident(string: &str) -> StrResult<()> { - if !string.is_empty() && string.chars().all(|c| c.is_ascii_alphabetic()) { - return Ok(()); - } - Err(format!("invalid identifier: {string:?}")) -} + let (head, tail) = match line.split_once(' ') { + Some((a, b)) => (a, Some(b)), + None => (line, None), + }; -/// Extracts the value of a variant, parsing `\u{XXXX}` and other escapes. -fn decode_value(mut text: &str) -> StrResult { - let mut result = String::new(); - loop { - if let Some(rest) = text.strip_prefix("\\u{") { - let Some((code, tail)) = rest.split_once('}') else { - return Err(format!( - "unclosed Unicode escape: \\u{{{}", - rest.escape_debug() - )); - }; - result.push( - u32::from_str_radix(code, 16) - .ok() - .and_then(|n| char::try_from(n).ok()) - .ok_or_else(|| format!("invalid Unicode escape \\u{{{code}}}"))?, - ); - text = tail; - } else if let Some(rest) = text.strip_prefix("\\vs{") { - let Some((value, tail)) = rest.split_once('}') else { - return Err(format!("unclosed VS escape: \\vs{{{}", rest.escape_debug())); - }; - let vs = match value { - "1" => '\u{fe00}', - "2" => '\u{fe01}', - "3" => '\u{fe02}', - "4" => '\u{fe03}', - "5" => '\u{fe04}', - "6" => '\u{fe05}', - "7" => '\u{fe06}', - "8" => '\u{fe07}', - "9" => '\u{fe08}', - "10" => '\u{fe09}', - "11" => '\u{fe0a}', - "12" => '\u{fe0b}', - "13" => '\u{fe0c}', - "14" => '\u{fe0d}', - "15" | "text" => '\u{fe0e}', - "16" | "emoji" => '\u{fe0f}', - code => return Err(format!("invalid VS escape: \\vs{{{code}}}")), - }; - result.push(vs); - text = tail; - } else if let Some((prefix, tail)) = text.find('\\').map(|i| text.split_at(i)) { - if prefix.is_empty() { - return Err(format!("invalid escape sequence: {tail}")); + Ok(if head == "@deprecated:" { + Line::Deprecated(tail.ok_or("missing deprecation message")?.trim()) + } else if tail == Some("{") { + validate_ident(head)?; + Line::ModuleStart(head) + } else if head == "}" && tail.is_none() { + Line::ModuleEnd + } else if let Some(rest) = head.strip_prefix('.') { + for part in rest.split('.') { + validate_ident(part)?; } - result.push_str(prefix); - text = tail; + let value = decode_value(tail.ok_or("missing char")?)?; + Line::Variant(ModifierSet::from_raw_dotted(rest), value) } else { - result.push_str(text); - return Ok(result); + validate_ident(head)?; + let value = tail.map(decode_value).transpose()?; + Line::Symbol(head, value) + }) + } + + /// Ensures that a string is a valid identifier. In `codex`, we use very strict + /// rules and allow only alphabetic ASCII chars. + fn validate_ident(string: &str) -> StrResult<()> { + if !string.is_empty() && string.chars().all(|c| c.is_ascii_alphabetic()) { + return Ok(()); } + Err(format!("invalid identifier: {string:?}")) } -} -/// Turns a stream of lines into a list of definitions. -fn parse<'a>( - p: &mut Peekable>>>, -) -> StrResult)>> { - let mut defs = vec![]; - loop { - match p.next().transpose()? { - None | Some(Declaration::ModuleEnd) => { - break; - } - Some(Declaration::Symbol(name, value, deprecation)) => { - let mut variants = vec![]; - while let Some(Declaration::Variant(name, value, deprecation)) = - p.peek().cloned().transpose()? - { - variants.push((name, value, deprecation)); - p.next(); + /// Extracts the value of a variant, parsing `\u{XXXX}` and other escapes. + fn decode_value(mut text: &str) -> StrResult { + let mut result = String::new(); + loop { + if let Some(rest) = text.strip_prefix("\\u{") { + let Some((code, tail)) = rest.split_once('}') else { + return Err(format!( + "unclosed Unicode escape: \\u{{{}", + rest.escape_debug() + )); + }; + result.push( + u32::from_str_radix(code, 16) + .ok() + .and_then(|n| char::try_from(n).ok()) + .ok_or_else(|| format!("invalid Unicode escape \\u{{{code}}}"))?, + ); + text = tail; + } else if let Some(rest) = text.strip_prefix("\\vs{") { + let Some((value, tail)) = rest.split_once('}') else { + return Err(format!( + "unclosed VS escape: \\vs{{{}", + rest.escape_debug(), + )); + }; + let vs = match value { + "1" => '\u{fe00}', + "2" => '\u{fe01}', + "3" => '\u{fe02}', + "4" => '\u{fe03}', + "5" => '\u{fe04}', + "6" => '\u{fe05}', + "7" => '\u{fe06}', + "8" => '\u{fe07}', + "9" => '\u{fe08}', + "10" => '\u{fe09}', + "11" => '\u{fe0a}', + "12" => '\u{fe0b}', + "13" => '\u{fe0c}', + "14" => '\u{fe0d}', + "15" | "text" => '\u{fe0e}', + "16" | "emoji" => '\u{fe0f}', + code => return Err(format!("invalid VS escape: \\vs{{{code}}}")), + }; + result.push(vs); + text = tail; + } else if let Some((prefix, tail)) = text.find('\\').map(|i| text.split_at(i)) + { + if prefix.is_empty() { + return Err(format!("invalid escape sequence: {tail}")); } + result.push_str(prefix); + text = tail; + } else { + result.push_str(text); + return Ok(result); + } + } + } - let symbol = if !variants.is_empty() { - if let Some(value) = value { - variants.insert(0, (ModifierSet::default(), value, None)); + /// Turns a stream of lines into a list of definitions. + fn parse<'a>( + p: &mut Peekable>>>, + ) -> StrResult)>> { + let mut defs = vec![]; + loop { + match p.next().transpose()? { + None | Some(Declaration::ModuleEnd) => { + break; + } + Some(Declaration::Symbol(name, value, deprecation)) => { + let mut variants = vec![]; + while let Some(Declaration::Variant(name, value, deprecation)) = + p.peek().cloned().transpose()? + { + variants.push((name, value, deprecation)); + p.next(); } - Symbol::Multi(variants) - } else { - let value = value.ok_or("symbol needs char or variants")?; - Symbol::Single(value) - }; - defs.push((name, Binding { def: Def::Symbol(symbol), deprecation })); - } - Some(Declaration::ModuleStart(name, deprecation)) => { - let module_defs = parse(p)?; - defs.push(( - name, - Binding { - def: Def::Module(Module::new(module_defs)), - deprecation, - }, - )); + let symbol = if !variants.is_empty() { + if let Some(value) = value { + variants.insert(0, (ModifierSet::default(), value, None)); + } + Symbol::Multi(variants) + } else { + let value = value.ok_or("symbol needs char or variants")?; + Symbol::Single(value) + }; + + defs.push((name, Binding { def: Def::Symbol(symbol), deprecation })); + } + Some(Declaration::ModuleStart(name, deprecation)) => { + let module_defs = parse(p)?; + defs.push(( + name, + Binding { + def: Def::Module(Module::new(module_defs)), + deprecation, + }, + )); + } + other => return Err(format!("expected definition, found {other:?}")), } - other => return Err(format!("expected definition, found {other:?}")), } + Ok(defs) } - Ok(defs) -} -/// Encodes a `Module` into Rust code. -fn encode(buf: &mut String, module: &Module) { - buf.push_str("Module(&["); - for (name, entry) in &module.0 { - write!(buf, "({name:?}, Binding {{ def: ").unwrap(); - match &entry.def { - Def::Module(module) => { - buf.push_str("Def::Module("); - encode(buf, module); - buf.push(')'); - } - Def::Symbol(symbol) => { - buf.push_str("Def::Symbol(Symbol::"); - match symbol { - Symbol::Single(value) => write!(buf, "Single({value:?})").unwrap(), - Symbol::Multi(list) => write!(buf, "Multi(&{list:?})").unwrap(), + /// Encodes a `Module` into Rust code. + fn encode(buf: &mut String, module: &Module) { + buf.push_str("Module(&["); + for (name, entry) in &module.0 { + write!(buf, "({name:?}, Binding {{ def: ").unwrap(); + match &entry.def { + Def::Module(module) => { + buf.push_str("Def::Module("); + encode(buf, module); + buf.push(')'); + } + Def::Symbol(symbol) => { + buf.push_str("Def::Symbol(Symbol::"); + match symbol { + Symbol::Single(value) => { + write!(buf, "Single({value:?})").unwrap() + } + Symbol::Multi(list) => write!(buf, "Multi(&{list:?})").unwrap(), + } + buf.push(')'); } - buf.push(')'); } + write!(buf, ", deprecation: {:?} }}),", entry.deprecation).unwrap(); + } + buf.push_str("])"); + } +} + +#[cfg(feature = "_test-unicode-conformance")] +mod unicode_data { + use std::path::Path; + + /// Downloads a file from Unicode. + pub fn download_file( + unicode_version: &str, + source: &str, + local_path: impl AsRef, + ) { + // This marker is added to the beginning of the downloaded file. If the + // file is already present, we can check whether the marker is right. If + // not, that means we need to re-download it. + let marker = format!("##CODEX# Unicode version: {unicode_version}"); + if !std::fs::read_to_string(&local_path) + .is_ok_and(|text| text.starts_with(&marker)) + { + let content = ureq::get(format!( + "https://www.unicode.org/Public/{unicode_version}/ucd/{source}" + )) + .call() + .unwrap() + .body_mut() + .read_to_string() + .unwrap(); + std::fs::write(local_path, marker + &content).unwrap(); } - write!(buf, ", deprecation: {:?} }}),", entry.deprecation).unwrap(); } - buf.push_str("])"); } diff --git a/src/lib.rs b/src/lib.rs index cd401ac..36d9971 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -191,18 +191,97 @@ mod test { } } + /// Returns the set of variation sequences defined in a file. + #[cfg(feature = "_test-unicode-conformance")] + fn read_sequences(source: &str) -> HashSet { + source + .lines() + .filter_map(|l| { + let line = l.split('#').next().unwrap_or(l); + (!line.is_empty()).then_some(line) + }) + .map(|line| { + line.split(';') + .next() + .unwrap() + .split_whitespace() + .map(|cp| { + char::from_u32(u32::from_str_radix(cp, 0x10).unwrap()).unwrap() + }) + .collect() + }) + .collect() + } + + /// Returns the set of standardized variation sequences defined by Unicode. + /// + /// This does not include emoji variation sequences (also known as + /// "presentation sequences"). + #[cfg(feature = "_test-unicode-conformance")] + fn get_valid_standardized_variation_sequences() -> HashSet { + read_sequences(include_str!(concat!( + env!("OUT_DIR"), + "/StandardizedVariants.txt", + ))) + } + + /// Tests whether a string is a standardized variation sequence. + /// + /// This does not include emoji variation sequences (i.e., presentation + /// sequences). Use [`is_presentation_sequence`] to test whether a string is + /// a presentation sequence. + fn is_standardized_variation_sequence(s: &str) -> bool { + // Non-specific variation selectors from + // https://unicode.org/charts/PDF/UFE00.pdf. + (0xFE00..=0xFE0D) + .map(|cp| char::from_u32(cp).unwrap()) + .any(|vs| s.contains(vs)) + } + + /// Tests that no standardized variation sequence is invalid. + /// + /// The validity of emoji variation sequences (i.e., presentation sequences) + /// is tested by [`no_invalid_presentation_sequence`]. + #[cfg(feature = "_test-unicode-conformance")] + #[test] + fn no_invalid_standardized_variation_sequence() { + let sequences = get_valid_standardized_variation_sequences(); + assert!( + are_all_variants_valid(ROOT, |c| { + if is_standardized_variation_sequence(c) { + sequences.contains(c) + } else { + true + } + }), + "invalid standardized variation sequence(s) (see list above)", + ) + } + /// https://www.unicode.org/reports/tr51/#def_text_presentation_selector. const TEXT_PRESENTATION_SELECTOR: char = '\u{FE0E}'; /// https://www.unicode.org/reports/tr51/#def_emoji_presentation_selector. const EMOJI_PRESENTATION_SELECTOR: char = '\u{FE0F}'; + /// Tests whether a string is a text presentation sequence. + fn is_text_presentation_sequence(s: &str) -> bool { + s.contains(TEXT_PRESENTATION_SELECTOR) + } + + /// Tests whether a string is an emoji presentation sequence. + fn is_emoji_presentation_sequence(s: &str) -> bool { + s.contains(EMOJI_PRESENTATION_SELECTOR) + } + + /// Tests whether a string is a presentation sequence. + fn is_presentation_sequence(s: &str) -> bool { + is_text_presentation_sequence(s) || is_emoji_presentation_sequence(s) + } + #[test] fn symbols_are_not_emojis() { assert!( - are_all_variants_valid( - SYM, - |c| !c.contains(EMOJI_PRESENTATION_SELECTOR), - ) , + are_all_variants_valid(SYM, |c| !is_emoji_presentation_sequence(c)), "unexpected use of emoji presentation selector in `sym` (see list above)", ) } @@ -210,36 +289,20 @@ mod test { #[test] fn emojis_are_not_text() { assert!( - are_all_variants_valid( - EMOJI, - |c| !c.contains(TEXT_PRESENTATION_SELECTOR), - ) , + are_all_variants_valid(EMOJI, |c| !is_text_presentation_sequence(c)), "unexpected use of text presentation selector in `emoji` (see list above)", ) } - /// Returns the list of presentation sequences defined by Unicode. + /// Returns the set of presentation sequences defined by Unicode. /// /// See: https://www.unicode.org/reports/tr51/#Emoji_Variation_Sequences. #[cfg(feature = "_test-unicode-conformance")] fn get_valid_presentation_sequences() -> HashSet { - include_str!(concat!(env!("OUT_DIR"), "/emoji-variation-sequences.txt")) - .lines() - .filter_map(|l| { - let line = l.split('#').next().unwrap_or(l); - (!line.is_empty()).then_some(line) - }) - .map(|line| { - line.split(';') - .next() - .unwrap() - .split_whitespace() - .map(|cp| { - char::from_u32(u32::from_str_radix(cp, 0x10).unwrap()).unwrap() - }) - .collect() - }) - .collect() + read_sequences(include_str!(concat!( + env!("OUT_DIR"), + "/emoji-variation-sequences.txt", + ))) } #[cfg(feature = "_test-unicode-conformance")] @@ -248,9 +311,7 @@ mod test { let sequences = get_valid_presentation_sequences(); assert!( are_all_variants_valid(ROOT, |c| { - if c.contains(TEXT_PRESENTATION_SELECTOR) - || c.contains(EMOJI_PRESENTATION_SELECTOR) - { + if is_presentation_sequence(c) { sequences.contains(c) } else { true @@ -269,10 +330,11 @@ mod test { .collect::>(); assert!( are_all_variants_valid(SYM, |c| { - // All emoji variation sequences are exactly 2 codepoints long - // as of Unicode 16.0, so this doesn't miss anything. - !(c.chars().count() == 1 - && require_presentation_selector.contains(&c.chars().next().unwrap())) + if require_presentation_selector.contains(&c.chars().next().unwrap()) { + is_text_presentation_sequence(c) + } else { + true + } }), "missing text presentation selector(s) in `sym` (see list above)", ) @@ -287,10 +349,11 @@ mod test { .collect::>(); assert!( are_all_variants_valid(EMOJI, |c| { - // All emoji variation sequences are exactly 2 codepoints long - // as of Unicode 16.0, so this doesn't miss anything. - !(c.chars().count() == 1 - && require_presentation_selector.contains(&c.chars().next().unwrap())) + if require_presentation_selector.contains(&c.chars().next().unwrap()) { + is_emoji_presentation_sequence(c) + } else { + true + } }), "missing emoji presentation selector(s) in `emoji` (see list above)", )