diff --git a/build.rs b/build.rs
index 87fdfd2..3a4ea4a 100644
--- a/build.rs
+++ b/build.rs
@@ -1,320 +1,370 @@
-use self::shared::ModifierSet;
-use std::fmt::Write;
-use std::iter;
-use std::iter::Peekable;
 use std::path::Path;
 
-type StrResult<T> = Result<T, String>;
-
 #[path = "src/shared.rs"]
 mod shared;
 
-/// A module of definitions.
-struct Module<'a>(Vec<(&'a str, Binding<'a>)>);
+fn main() {
+    println!("cargo::rerun-if-changed=build.rs");
+
+    let out = std::env::var_os("OUT_DIR").unwrap();
+    let out_path = Path::new(&out);
+
+    module_files::generate_modules(out_path);
 
-impl<'a> Module<'a> {
-    fn new(mut list: Vec<(&'a str, Binding<'a>)>) -> Self {
-        list.sort_by_key(|&(name, _)| name);
-        Self(list)
+    #[cfg(feature = "_test-unicode-conformance")]
+    {
+        const UNICODE_VERSION: &str = "16.0.0";
+        unicode_data::download_file(
+            UNICODE_VERSION,
+            "emoji/emoji-variation-sequences.txt",
+            out_path.join("emoji-variation-sequences.txt"),
+        );
+        unicode_data::download_file(
+            UNICODE_VERSION,
+            "StandardizedVariants.txt",
+            out_path.join("StandardizedVariants.txt"),
+        )
     }
 }
 
-/// A definition bound in a module, with metadata.
-struct Binding<'a> {
-    def: Def<'a>,
-    deprecation: Option<&'a str>,
-}
+mod module_files {
+    use super::shared::ModifierSet;
+    use std::fmt::Write;
+    use std::iter;
+    use std::iter::Peekable;
+    use std::path::Path;
 
-/// A definition in a module.
-enum Def<'a> {
-    Symbol(Symbol<'a>),
-    Module(Module<'a>),
-}
+    type StrResult<T> = Result<T, String>;
 
-/// A symbol, either a leaf or with modifiers with optional deprecation.
-enum Symbol<'a> {
-    Single(String),
-    Multi(Vec<(ModifierSet<&'a str>, String, Option<&'a str>)>),
-}
+    /// A module of definitions.
+    struct Module<'a>(Vec<(&'a str, Binding<'a>)>);
 
-/// A single line during parsing.
-#[derive(Debug, Clone)]
-enum Line<'a> {
-    Blank,
-    Deprecated(&'a str),
-    ModuleStart(&'a str),
-    ModuleEnd,
-    Symbol(&'a str, Option<String>),
-    Variant(ModifierSet<&'a str>, String),
-    Eof,
-}
+    impl<'a> Module<'a> {
+        fn new(mut list: Vec<(&'a str, Binding<'a>)>) -> Self {
+            list.sort_by_key(|&(name, _)| name);
+            Self(list)
+        }
+    }
 
-#[derive(Debug, Clone)]
-enum Declaration<'a> {
-    ModuleStart(&'a str, Option<&'a str>),
-    ModuleEnd,
-    Symbol(&'a str, Option<String>, Option<&'a str>),
-    Variant(ModifierSet<&'a str>, String, Option<&'a str>),
-}
+    /// A definition bound in a module, with metadata.
+    struct Binding<'a> {
+        def: Def<'a>,
+        deprecation: Option<&'a str>,
+    }
 
-fn main() {
-    println!("cargo::rerun-if-changed=build.rs");
+    /// A definition in a module.
+    enum Def<'a> {
+        Symbol(Symbol<'a>),
+        Module(Module<'a>),
+    }
 
-    let mut buf = String::new();
-    process(&mut buf, Path::new("src/modules/sym.txt"), "SYM", "Named general symbols.");
-    process(&mut buf, Path::new("src/modules/emoji.txt"), "EMOJI", "Named emoji.");
+    /// A symbol, either a leaf or with modifiers with optional deprecation.
+    enum Symbol<'a> {
+        Single(String),
+        Multi(Vec<(ModifierSet<&'a str>, String, Option<&'a str>)>),
+    }
 
-    let out = std::env::var_os("OUT_DIR").unwrap();
-    let dest = Path::new(&out).join("out.rs");
-    std::fs::write(&dest, buf).unwrap();
+    /// A single line during parsing.
+    #[derive(Debug, Clone)]
+    enum Line<'a> {
+        Blank,
+        Deprecated(&'a str),
+        ModuleStart(&'a str),
+        ModuleEnd,
+        Symbol(&'a str, Option<String>),
+        Variant(ModifierSet<&'a str>, String),
+        Eof,
+    }
 
-    #[cfg(feature = "_test-unicode-conformance")]
-    {
-        let emoji_vs_list = Path::new(&out).join("emoji-variation-sequences.txt");
-        if !std::fs::read_to_string(&emoji_vs_list)
-            .is_ok_and(|text| text.contains("Emoji Version 16.0"))
-        {
-            let content = ureq::get(
-                "https://www.unicode.org/Public/16.0.0/ucd/emoji/emoji-variation-sequences.txt",
-            )
-                .call()
-                .unwrap()
-                .body_mut()
-                .read_to_string()
-                .unwrap();
-            std::fs::write(emoji_vs_list, content).unwrap();
-        }
+    #[derive(Debug, Clone)]
+    enum Declaration<'a> {
+        ModuleStart(&'a str, Option<&'a str>),
+        ModuleEnd,
+        Symbol(&'a str, Option<String>, Option<&'a str>),
+        Variant(ModifierSet<&'a str>, String, Option<&'a str>),
     }
-}
 
-/// Processes a single file and turns it into a global module.
-fn process(buf: &mut String, file: &Path, name: &str, desc: &str) {
-    println!("cargo::rerun-if-changed={}", file.display());
-
-    let text = std::fs::read_to_string(file).unwrap();
-    let mut line_nr = 0;
-    let mut deprecation = None;
-    let mut iter = text
-        .lines()
-        .inspect(|_| line_nr += 1)
-        .map(tokenize)
-        .chain(iter::once(Ok(Line::Eof)))
-        .filter_map(|line| match line {
-            Err(message) => Some(Err(message)),
-            Ok(Line::Blank) => None,
-            Ok(Line::Deprecated(message)) => {
-                if deprecation.is_some() {
-                    Some(Err(String::from("duplicate `@deprecated:`")))
-                } else {
-                    deprecation = Some(message);
-                    None
+    /// Generate Codex modules from files.
+    pub(crate) fn generate_modules(out: &Path) {
+        let mut buf = String::new();
+
+        process(
+            &mut buf,
+            Path::new("src/modules/sym.txt"),
+            "SYM",
+            "Named general symbols.",
+        );
+
+        process(&mut buf, Path::new("src/modules/emoji.txt"), "EMOJI", "Named emoji.");
+
+        std::fs::write(out.join("out.rs"), buf).unwrap();
+    }
+
+    /// Processes a single file and turns it into a global module.
+    fn process(buf: &mut String, file: &Path, name: &str, desc: &str) {
+        println!("cargo::rerun-if-changed={}", file.display());
+
+        let text = std::fs::read_to_string(file).unwrap();
+        let mut line_nr = 0;
+        let mut deprecation = None;
+        let mut iter = text
+            .lines()
+            .inspect(|_| line_nr += 1)
+            .map(tokenize)
+            .chain(iter::once(Ok(Line::Eof)))
+            .filter_map(|line| match line {
+                Err(message) => Some(Err(message)),
+                Ok(Line::Blank) => None,
+                Ok(Line::Deprecated(message)) => {
+                    if deprecation.is_some() {
+                        Some(Err(String::from("duplicate `@deprecated:`")))
+                    } else {
+                        deprecation = Some(message);
+                        None
+                    }
                 }
-            }
-            Ok(Line::ModuleStart(name)) => {
-                Some(Ok(Declaration::ModuleStart(name, deprecation.take())))
-            }
-            Ok(Line::ModuleEnd) => {
-                if deprecation.is_some() {
-                    Some(Err(String::from("dangling `@deprecated:`")))
-                } else {
-                    Some(Ok(Declaration::ModuleEnd))
+                Ok(Line::ModuleStart(name)) => {
+                    Some(Ok(Declaration::ModuleStart(name, deprecation.take())))
                 }
-            }
-            Ok(Line::Symbol(name, value)) => {
-                Some(Ok(Declaration::Symbol(name, value, deprecation.take())))
-            }
-            Ok(Line::Variant(modifiers, value)) => {
-                Some(Ok(Declaration::Variant(modifiers, value, deprecation.take())))
-            }
-            Ok(Line::Eof) => {
-                deprecation.map(|_| Err(String::from("dangling `@deprecated:`")))
-            }
-        })
-        .peekable();
-
-    let module = match parse(&mut iter) {
-        Ok(defs) => Module::new(defs),
-        Err(e) => {
-            let message = format!("{}:{}: {e}", file.display(), line_nr);
-            println!("cargo::warning={message}");
-            std::process::exit(1);
-        }
-    };
-
-    write!(buf, "#[doc = {desc:?}] pub const {name}: Module = ").unwrap();
-    encode(buf, &module);
-    buf.push(';');
-}
+                Ok(Line::ModuleEnd) => {
+                    if deprecation.is_some() {
+                        Some(Err(String::from("dangling `@deprecated:`")))
+                    } else {
+                        Some(Ok(Declaration::ModuleEnd))
+                    }
+                }
+                Ok(Line::Symbol(name, value)) => {
+                    Some(Ok(Declaration::Symbol(name, value, deprecation.take())))
+                }
+                Ok(Line::Variant(modifiers, value)) => {
+                    Some(Ok(Declaration::Variant(modifiers, value, deprecation.take())))
+                }
+                Ok(Line::Eof) => {
+                    deprecation.map(|_| Err(String::from("dangling `@deprecated:`")))
+                }
+            })
+            .peekable();
 
-/// Tokenizes and classifies a line.
-fn tokenize(line: &str) -> StrResult<Line<'_>> {
-    // Strip comments.
-    let line = line.split_once("//").map_or(line, |(head, _)| head);
+        let module = match parse(&mut iter) {
+            Ok(defs) => Module::new(defs),
+            Err(e) => {
+                let message = format!("{}:{}: {e}", file.display(), line_nr);
+                println!("cargo::warning={message}");
+                std::process::exit(1);
+            }
+        };
 
-    // Ignore empty lines.
-    let line = line.trim();
-    if line.is_empty() {
-        return Ok(Line::Blank);
+        write!(buf, "#[doc = {desc:?}] pub const {name}: Module = ").unwrap();
+        encode(buf, &module);
+        buf.push(';');
     }
 
-    let (head, tail) = match line.split_once(' ') {
-        Some((a, b)) => (a, Some(b)),
-        None => (line, None),
-    };
-
-    Ok(if head == "@deprecated:" {
-        Line::Deprecated(tail.ok_or("missing deprecation message")?.trim())
-    } else if tail == Some("{") {
-        validate_ident(head)?;
-        Line::ModuleStart(head)
-    } else if head == "}" && tail.is_none() {
-        Line::ModuleEnd
-    } else if let Some(rest) = head.strip_prefix('.') {
-        for part in rest.split('.') {
-            validate_ident(part)?;
+    /// Tokenizes and classifies a line.
+    fn tokenize(line: &str) -> StrResult<Line<'_>> {
+        // Strip comments.
+        let line = line.split_once("//").map_or(line, |(head, _)| head);
+
+        // Ignore empty lines.
+        let line = line.trim();
+        if line.is_empty() {
+            return Ok(Line::Blank);
         }
-        let value = decode_value(tail.ok_or("missing char")?)?;
-        Line::Variant(ModifierSet::from_raw_dotted(rest), value)
-    } else {
-        validate_ident(head)?;
-        let value = tail.map(decode_value).transpose()?;
-        Line::Symbol(head, value)
-    })
-}
 
-/// Ensures that a string is a valid identifier. In `codex`, we use very strict
-/// rules and allow only alphabetic ASCII chars.
-fn validate_ident(string: &str) -> StrResult<()> {
-    if !string.is_empty() && string.chars().all(|c| c.is_ascii_alphabetic()) {
-        return Ok(());
-    }
-    Err(format!("invalid identifier: {string:?}"))
-}
+        let (head, tail) = match line.split_once(' ') {
+            Some((a, b)) => (a, Some(b)),
+            None => (line, None),
+        };
 
-/// Extracts the value of a variant, parsing `\u{XXXX}` and other escapes.
-fn decode_value(mut text: &str) -> StrResult<String> {
-    let mut result = String::new();
-    loop {
-        if let Some(rest) = text.strip_prefix("\\u{") {
-            let Some((code, tail)) = rest.split_once('}') else {
-                return Err(format!(
-                    "unclosed Unicode escape: \\u{{{}",
-                    rest.escape_debug()
-                ));
-            };
-            result.push(
-                u32::from_str_radix(code, 16)
-                    .ok()
-                    .and_then(|n| char::try_from(n).ok())
-                    .ok_or_else(|| format!("invalid Unicode escape \\u{{{code}}}"))?,
-            );
-            text = tail;
-        } else if let Some(rest) = text.strip_prefix("\\vs{") {
-            let Some((value, tail)) = rest.split_once('}') else {
-                return Err(format!("unclosed VS escape: \\vs{{{}", rest.escape_debug()));
-            };
-            let vs = match value {
-                "1" => '\u{fe00}',
-                "2" => '\u{fe01}',
-                "3" => '\u{fe02}',
-                "4" => '\u{fe03}',
-                "5" => '\u{fe04}',
-                "6" => '\u{fe05}',
-                "7" => '\u{fe06}',
-                "8" => '\u{fe07}',
-                "9" => '\u{fe08}',
-                "10" => '\u{fe09}',
-                "11" => '\u{fe0a}',
-                "12" => '\u{fe0b}',
-                "13" => '\u{fe0c}',
-                "14" => '\u{fe0d}',
-                "15" | "text" => '\u{fe0e}',
-                "16" | "emoji" => '\u{fe0f}',
-                code => return Err(format!("invalid VS escape: \\vs{{{code}}}")),
-            };
-            result.push(vs);
-            text = tail;
-        } else if let Some((prefix, tail)) = text.find('\\').map(|i| text.split_at(i)) {
-            if prefix.is_empty() {
-                return Err(format!("invalid escape sequence: {tail}"));
+        Ok(if head == "@deprecated:" {
+            Line::Deprecated(tail.ok_or("missing deprecation message")?.trim())
+        } else if tail == Some("{") {
+            validate_ident(head)?;
+            Line::ModuleStart(head)
+        } else if head == "}" && tail.is_none() {
+            Line::ModuleEnd
+        } else if let Some(rest) = head.strip_prefix('.') {
+            for part in rest.split('.') {
+                validate_ident(part)?;
             }
-            result.push_str(prefix);
-            text = tail;
+            let value = decode_value(tail.ok_or("missing char")?)?;
+            Line::Variant(ModifierSet::from_raw_dotted(rest), value)
         } else {
-            result.push_str(text);
-            return Ok(result);
+            validate_ident(head)?;
+            let value = tail.map(decode_value).transpose()?;
+            Line::Symbol(head, value)
+        })
+    }
+
+    /// Ensures that a string is a valid identifier. In `codex`, we use very strict
+    /// rules and allow only alphabetic ASCII chars.
+    fn validate_ident(string: &str) -> StrResult<()> {
+        if !string.is_empty() && string.chars().all(|c| c.is_ascii_alphabetic()) {
+            return Ok(());
         }
+        Err(format!("invalid identifier: {string:?}"))
     }
-}
 
-/// Turns a stream of lines into a list of definitions.
-fn parse<'a>(
-    p: &mut Peekable<impl Iterator<Item = StrResult<Declaration<'a>>>>,
-) -> StrResult<Vec<(&'a str, Binding<'a>)>> {
-    let mut defs = vec![];
-    loop {
-        match p.next().transpose()? {
-            None | Some(Declaration::ModuleEnd) => {
-                break;
-            }
-            Some(Declaration::Symbol(name, value, deprecation)) => {
-                let mut variants = vec![];
-                while let Some(Declaration::Variant(name, value, deprecation)) =
-                    p.peek().cloned().transpose()?
-                {
-                    variants.push((name, value, deprecation));
-                    p.next();
+    /// Extracts the value of a variant, parsing `\u{XXXX}` and other escapes.
+    fn decode_value(mut text: &str) -> StrResult<String> {
+        let mut result = String::new();
+        loop {
+            if let Some(rest) = text.strip_prefix("\\u{") {
+                let Some((code, tail)) = rest.split_once('}') else {
+                    return Err(format!(
+                        "unclosed Unicode escape: \\u{{{}",
+                        rest.escape_debug()
+                    ));
+                };
+                result.push(
+                    u32::from_str_radix(code, 16)
+                        .ok()
+                        .and_then(|n| char::try_from(n).ok())
+                        .ok_or_else(|| format!("invalid Unicode escape \\u{{{code}}}"))?,
+                );
+                text = tail;
+            } else if let Some(rest) = text.strip_prefix("\\vs{") {
+                let Some((value, tail)) = rest.split_once('}') else {
+                    return Err(format!(
+                        "unclosed VS escape: \\vs{{{}",
+                        rest.escape_debug(),
+                    ));
+                };
+                let vs = match value {
+                    "1" => '\u{fe00}',
+                    "2" => '\u{fe01}',
+                    "3" => '\u{fe02}',
+                    "4" => '\u{fe03}',
+                    "5" => '\u{fe04}',
+                    "6" => '\u{fe05}',
+                    "7" => '\u{fe06}',
+                    "8" => '\u{fe07}',
+                    "9" => '\u{fe08}',
+                    "10" => '\u{fe09}',
+                    "11" => '\u{fe0a}',
+                    "12" => '\u{fe0b}',
+                    "13" => '\u{fe0c}',
+                    "14" => '\u{fe0d}',
+                    "15" | "text" => '\u{fe0e}',
+                    "16" | "emoji" => '\u{fe0f}',
+                    code => return Err(format!("invalid VS escape: \\vs{{{code}}}")),
+                };
+                result.push(vs);
+                text = tail;
+            } else if let Some((prefix, tail)) = text.find('\\').map(|i| text.split_at(i))
+            {
+                if prefix.is_empty() {
+                    return Err(format!("invalid escape sequence: {tail}"));
                 }
+                result.push_str(prefix);
+                text = tail;
+            } else {
+                result.push_str(text);
+                return Ok(result);
+            }
+        }
+    }
 
-                let symbol = if !variants.is_empty() {
-                    if let Some(value) = value {
-                        variants.insert(0, (ModifierSet::default(), value, None));
+    /// Turns a stream of lines into a list of definitions.
+    fn parse<'a>(
+        p: &mut Peekable<impl Iterator<Item = StrResult<Declaration<'a>>>>,
+    ) -> StrResult<Vec<(&'a str, Binding<'a>)>> {
+        let mut defs = vec![];
+        loop {
+            match p.next().transpose()? {
+                None | Some(Declaration::ModuleEnd) => {
+                    break;
+                }
+                Some(Declaration::Symbol(name, value, deprecation)) => {
+                    let mut variants = vec![];
+                    while let Some(Declaration::Variant(name, value, deprecation)) =
+                        p.peek().cloned().transpose()?
+                    {
+                        variants.push((name, value, deprecation));
+                        p.next();
                     }
-                    Symbol::Multi(variants)
-                } else {
-                    let value = value.ok_or("symbol needs char or variants")?;
-                    Symbol::Single(value)
-                };
 
-                defs.push((name, Binding { def: Def::Symbol(symbol), deprecation }));
-            }
-            Some(Declaration::ModuleStart(name, deprecation)) => {
-                let module_defs = parse(p)?;
-                defs.push((
-                    name,
-                    Binding {
-                        def: Def::Module(Module::new(module_defs)),
-                        deprecation,
-                    },
-                ));
+                    let symbol = if !variants.is_empty() {
+                        if let Some(value) = value {
+                            variants.insert(0, (ModifierSet::default(), value, None));
+                        }
+                        Symbol::Multi(variants)
+                    } else {
+                        let value = value.ok_or("symbol needs char or variants")?;
+                        Symbol::Single(value)
+                    };
+
+                    defs.push((name, Binding { def: Def::Symbol(symbol), deprecation }));
+                }
+                Some(Declaration::ModuleStart(name, deprecation)) => {
+                    let module_defs = parse(p)?;
+                    defs.push((
+                        name,
+                        Binding {
+                            def: Def::Module(Module::new(module_defs)),
+                            deprecation,
+                        },
+                    ));
+                }
+                other => return Err(format!("expected definition, found {other:?}")),
             }
-            other => return Err(format!("expected definition, found {other:?}")),
         }
+        Ok(defs)
     }
-    Ok(defs)
-}
 
-/// Encodes a `Module` into Rust code.
-fn encode(buf: &mut String, module: &Module) {
-    buf.push_str("Module(&[");
-    for (name, entry) in &module.0 {
-        write!(buf, "({name:?}, Binding {{ def: ").unwrap();
-        match &entry.def {
-            Def::Module(module) => {
-                buf.push_str("Def::Module(");
-                encode(buf, module);
-                buf.push(')');
-            }
-            Def::Symbol(symbol) => {
-                buf.push_str("Def::Symbol(Symbol::");
-                match symbol {
-                    Symbol::Single(value) => write!(buf, "Single({value:?})").unwrap(),
-                    Symbol::Multi(list) => write!(buf, "Multi(&{list:?})").unwrap(),
+    /// Encodes a `Module` into Rust code.
+    fn encode(buf: &mut String, module: &Module) {
+        buf.push_str("Module(&[");
+        for (name, entry) in &module.0 {
+            write!(buf, "({name:?}, Binding {{ def: ").unwrap();
+            match &entry.def {
+                Def::Module(module) => {
+                    buf.push_str("Def::Module(");
+                    encode(buf, module);
+                    buf.push(')');
+                }
+                Def::Symbol(symbol) => {
+                    buf.push_str("Def::Symbol(Symbol::");
+                    match symbol {
+                        Symbol::Single(value) => {
+                            write!(buf, "Single({value:?})").unwrap()
+                        }
+                        Symbol::Multi(list) => write!(buf, "Multi(&{list:?})").unwrap(),
+                    }
+                    buf.push(')');
                 }
-                buf.push(')');
             }
+            write!(buf, ", deprecation: {:?} }}),", entry.deprecation).unwrap();
+        }
+        buf.push_str("])");
+    }
+}
+
+#[cfg(feature = "_test-unicode-conformance")]
+mod unicode_data {
+    use std::path::Path;
+
+    /// Downloads a file from Unicode.
+    pub fn download_file(
+        unicode_version: &str,
+        source: &str,
+        local_path: impl AsRef<Path>,
+    ) {
+        // This marker is added to the beginning of the downloaded file. If the
+        // file is already present, we can check whether the marker is right. If
+        // not, that means we need to re-download it.
+        let marker = format!("##CODEX# Unicode version: {unicode_version}");
+        if !std::fs::read_to_string(&local_path)
+            .is_ok_and(|text| text.starts_with(&marker))
+        {
+            let content = ureq::get(format!(
+                "https://www.unicode.org/Public/{unicode_version}/ucd/{source}"
+            ))
+            .call()
+            .unwrap()
+            .body_mut()
+            .read_to_string()
+            .unwrap();
+            std::fs::write(local_path, marker + &content).unwrap();
         }
-        write!(buf, ", deprecation: {:?} }}),", entry.deprecation).unwrap();
     }
-    buf.push_str("])");
 }
diff --git a/src/lib.rs b/src/lib.rs
index cd401ac..36d9971 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -191,18 +191,97 @@ mod test {
         }
     }
 
+    /// Returns the set of variation sequences defined in a file.
+    #[cfg(feature = "_test-unicode-conformance")]
+    fn read_sequences(source: &str) -> HashSet<String> {
+        source
+            .lines()
+            .filter_map(|l| {
+                let line = l.split('#').next().unwrap_or(l);
+                (!line.is_empty()).then_some(line)
+            })
+            .map(|line| {
+                line.split(';')
+                    .next()
+                    .unwrap()
+                    .split_whitespace()
+                    .map(|cp| {
+                        char::from_u32(u32::from_str_radix(cp, 0x10).unwrap()).unwrap()
+                    })
+                    .collect()
+            })
+            .collect()
+    }
+
+    /// Returns the set of standardized variation sequences defined by Unicode.
+    ///
+    /// This does not include emoji variation sequences (also known as
+    /// "presentation sequences").
+    #[cfg(feature = "_test-unicode-conformance")]
+    fn get_valid_standardized_variation_sequences() -> HashSet<String> {
+        read_sequences(include_str!(concat!(
+            env!("OUT_DIR"),
+            "/StandardizedVariants.txt",
+        )))
+    }
+
+    /// Tests whether a string is a standardized variation sequence.
+    ///
+    /// This does not include emoji variation sequences (i.e., presentation
+    /// sequences). Use [`is_presentation_sequence`] to test whether a string is
+    /// a presentation sequence.
+    fn is_standardized_variation_sequence(s: &str) -> bool {
+        // Non-specific variation selectors from
+        // https://unicode.org/charts/PDF/UFE00.pdf.
+        (0xFE00..=0xFE0D)
+            .map(|cp| char::from_u32(cp).unwrap())
+            .any(|vs| s.contains(vs))
+    }
+
+    /// Tests that no standardized variation sequence is invalid.
+    ///
+    /// The validity of emoji variation sequences (i.e., presentation sequences)
+    /// is tested by [`no_invalid_presentation_sequence`].
+    #[cfg(feature = "_test-unicode-conformance")]
+    #[test]
+    fn no_invalid_standardized_variation_sequence() {
+        let sequences = get_valid_standardized_variation_sequences();
+        assert!(
+            are_all_variants_valid(ROOT, |c| {
+                if is_standardized_variation_sequence(c) {
+                    sequences.contains(c)
+                } else {
+                    true
+                }
+            }),
+            "invalid standardized variation sequence(s) (see list above)",
+        )
+    }
+
     /// https://www.unicode.org/reports/tr51/#def_text_presentation_selector.
     const TEXT_PRESENTATION_SELECTOR: char = '\u{FE0E}';
     /// https://www.unicode.org/reports/tr51/#def_emoji_presentation_selector.
     const EMOJI_PRESENTATION_SELECTOR: char = '\u{FE0F}';
 
+    /// Tests whether a string is a text presentation sequence.
+    fn is_text_presentation_sequence(s: &str) -> bool {
+        s.contains(TEXT_PRESENTATION_SELECTOR)
+    }
+
+    /// Tests whether a string is an emoji presentation sequence.
+    fn is_emoji_presentation_sequence(s: &str) -> bool {
+        s.contains(EMOJI_PRESENTATION_SELECTOR)
+    }
+
+    /// Tests whether a string is a presentation sequence.
+    fn is_presentation_sequence(s: &str) -> bool {
+        is_text_presentation_sequence(s) || is_emoji_presentation_sequence(s)
+    }
+
     #[test]
     fn symbols_are_not_emojis() {
         assert!(
-            are_all_variants_valid(
-                SYM,
-                |c| !c.contains(EMOJI_PRESENTATION_SELECTOR),
-            ) ,
+            are_all_variants_valid(SYM, |c| !is_emoji_presentation_sequence(c)),
             "unexpected use of emoji presentation selector in `sym` (see list above)",
         )
     }
@@ -210,36 +289,20 @@ mod test {
     #[test]
     fn emojis_are_not_text() {
         assert!(
-            are_all_variants_valid(
-                EMOJI,
-                |c| !c.contains(TEXT_PRESENTATION_SELECTOR),
-            ) ,
+            are_all_variants_valid(EMOJI, |c| !is_text_presentation_sequence(c)),
             "unexpected use of text presentation selector in `emoji` (see list above)",
         )
     }
 
-    /// Returns the list of presentation sequences defined by Unicode.
+    /// Returns the set of presentation sequences defined by Unicode.
     ///
     /// See: https://www.unicode.org/reports/tr51/#Emoji_Variation_Sequences.
     #[cfg(feature = "_test-unicode-conformance")]
     fn get_valid_presentation_sequences() -> HashSet<String> {
-        include_str!(concat!(env!("OUT_DIR"), "/emoji-variation-sequences.txt"))
-            .lines()
-            .filter_map(|l| {
-                let line = l.split('#').next().unwrap_or(l);
-                (!line.is_empty()).then_some(line)
-            })
-            .map(|line| {
-                line.split(';')
-                    .next()
-                    .unwrap()
-                    .split_whitespace()
-                    .map(|cp| {
-                        char::from_u32(u32::from_str_radix(cp, 0x10).unwrap()).unwrap()
-                    })
-                    .collect()
-            })
-            .collect()
+        read_sequences(include_str!(concat!(
+            env!("OUT_DIR"),
+            "/emoji-variation-sequences.txt",
+        )))
     }
 
     #[cfg(feature = "_test-unicode-conformance")]
@@ -248,9 +311,7 @@ mod test {
         let sequences = get_valid_presentation_sequences();
         assert!(
             are_all_variants_valid(ROOT, |c| {
-                if c.contains(TEXT_PRESENTATION_SELECTOR)
-                    || c.contains(EMOJI_PRESENTATION_SELECTOR)
-                {
+                if is_presentation_sequence(c) {
                     sequences.contains(c)
                 } else {
                     true
@@ -269,10 +330,11 @@ mod test {
             .collect::<HashSet<_>>();
         assert!(
             are_all_variants_valid(SYM, |c| {
-                // All emoji variation sequences are exactly 2 codepoints long
-                // as of Unicode 16.0, so this doesn't miss anything.
-                !(c.chars().count() == 1
-                    && require_presentation_selector.contains(&c.chars().next().unwrap()))
+                if require_presentation_selector.contains(&c.chars().next().unwrap()) {
+                    is_text_presentation_sequence(c)
+                } else {
+                    true
+                }
             }),
             "missing text presentation selector(s) in `sym` (see list above)",
         )
@@ -287,10 +349,11 @@ mod test {
             .collect::<HashSet<_>>();
         assert!(
             are_all_variants_valid(EMOJI, |c| {
-                // All emoji variation sequences are exactly 2 codepoints long
-                // as of Unicode 16.0, so this doesn't miss anything.
-                !(c.chars().count() == 1
-                    && require_presentation_selector.contains(&c.chars().next().unwrap()))
+                if require_presentation_selector.contains(&c.chars().next().unwrap()) {
+                    is_emoji_presentation_sequence(c)
+                } else {
+                    true
+                }
             }),
             "missing emoji presentation selector(s) in `emoji` (see list above)",
         )