@@ -161,9 +161,6 @@ TEST(Regex_matches, match_false_6) {
161161}
162162
163163TEST (Regex_matches, unicode_range_arabic_indic_digit) {
164- // U+0660-U+0669 are Arabic-Indic digits
165- // With /u flag: [\u0660-\u0669] would match any Arabic-Indic digit
166- // Without /u flag: treated as UTF-8 bytes, requires complex pattern
167164 const auto regex{
168165 sourcemeta::core::to_regex<std::string>(" [\\ u0660-\\ u0669]" )};
169166 EXPECT_TRUE (regex.has_value ());
@@ -173,9 +170,6 @@ TEST(Regex_matches, unicode_range_arabic_indic_digit) {
173170}
174171
175172TEST (Regex_matches, unicode_range_4byte_deseret) {
176- // U+10400-U+1044F are Deseret letters (4-byte UTF-8)
177- // With /u flag: [\u{10400}-\u{1044F}] would work
178- // Without /u flag: cannot express this range simply
179173 const auto regex{
180174 sourcemeta::core::to_regex<std::string>(" [\\ u{10400}-\\ u{1044F}]" )};
181175 EXPECT_TRUE (regex.has_value ());
@@ -187,8 +181,6 @@ TEST(Regex_matches, unicode_range_4byte_deseret) {
187181}
188182
189183TEST (Regex_matches, unicode_property_letter) {
190- // With /u flag and Unicode property escapes: \p{Letter} matches any letter
191- // This would drastically simplify XML Name validation
192184 const auto regex{sourcemeta::core::to_regex<std::string>(
193185 " ^\\ p{Letter}[\\ p{Letter}\\ p{Number}]*$" )};
194186 EXPECT_TRUE (regex.has_value ());
@@ -204,8 +196,6 @@ TEST(Regex_matches, unicode_property_letter) {
204196}
205197
206198TEST (Regex_matches, unicode_property_exclude_digit) {
207- // With Unicode properties, we could exclude digits from start position
208- // while allowing them in subsequent positions - exactly what XML NCName needs
209199 const auto regex{sourcemeta::core::to_regex<std::string>(
210200 " ^(?!\\ p{Number})\\ p{Letter}[\\ p{Letter}\\ p{Number}-_.]*$" )};
211201 EXPECT_TRUE (regex.has_value ());
@@ -220,8 +210,6 @@ TEST(Regex_matches, unicode_property_exclude_digit) {
220210}
221211
222212TEST (Regex_matches, unicode_dot_matches_codepoint) {
223- // With /u flag, . matches one Unicode codepoint (including 4-byte chars)
224- // Without /u flag, . matches one byte
225213 const auto regex{sourcemeta::core::to_regex<std::string>(" ^.$" )};
226214 EXPECT_TRUE (regex.has_value ());
227215 EXPECT_TRUE (sourcemeta::core::matches<std::string>(regex.value (), " A" ));
@@ -232,8 +220,6 @@ TEST(Regex_matches, unicode_dot_matches_codepoint) {
232220}
233221
234222TEST (Regex_matches, unicode_quantifier_on_codepoints) {
235- // With /u flag, quantifiers work on Unicode codepoints
236- // Without /u flag, quantifiers work on bytes
237223 const auto regex{sourcemeta::core::to_regex<std::string>(" ^.{3}$" )};
238224 EXPECT_TRUE (regex.has_value ());
239225 EXPECT_TRUE (sourcemeta::core::matches<std::string>(regex.value (), " ABC" ));
@@ -245,7 +231,6 @@ TEST(Regex_matches, unicode_quantifier_on_codepoints) {
245231
246232TEST (Regex_matches, digit_ascii_only) {
247233 // \d should only match ASCII digits 0-9, not Unicode digits
248- // From: https://github.com/json-schema-org/JSON-Schema-Test-Suite
249234 const auto regex{sourcemeta::core::to_regex<std::string>(" ^\\ d$" )};
250235 EXPECT_TRUE (regex.has_value ());
251236 EXPECT_TRUE (sourcemeta::core::matches<std::string>(regex.value (), " 0" ));
@@ -289,14 +274,10 @@ TEST(Regex_matches, nonbmp_literal_match) {
289274}
290275
291276TEST (Regex_matches, xml_ncname_simplified) {
292- // Current: 722KB exhaustive byte pattern
293- // With Unicode: ~50 bytes using property escapes
294- // NCName = letter (not colon) followed by letters/digits/punctuation
295277 const auto regex{sourcemeta::core::to_regex<std::string>(
296278 " ^(?![:\\ p{Nd}])[\\ p{L}_][\\ p{L}\\ p{Nd}\\ -._·]*$" )};
297279 EXPECT_TRUE (regex.has_value ());
298280
299- // Valid NCNames
300281 EXPECT_TRUE (sourcemeta::core::matches<std::string>(regex.value (), " element" ));
301282 EXPECT_TRUE (
302283 sourcemeta::core::matches<std::string>(regex.value (), " _element" ));
@@ -307,8 +288,6 @@ TEST(Regex_matches, xml_ncname_simplified) {
307288 sourcemeta::core::matches<std::string>(regex.value (), " element٠" ));
308289 EXPECT_TRUE (sourcemeta::core::matches<std::string>(regex.value (),
309290 " \U00010400 element" ));
310-
311- // Invalid NCNames
312291 EXPECT_FALSE (
313292 sourcemeta::core::matches<std::string>(regex.value (), " :element" ));
314293 EXPECT_FALSE (
0 commit comments