Skip to content

Commit 7a4a504

Browse files
committed
implement generate_pattern_string
1 parent fb58702 commit 7a4a504

File tree

3 files changed

+187
-12
lines changed

3 files changed

+187
-12
lines changed

Diff for: include/ada/url_pattern-inl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ Tokenizer::process_tokenizing_error(size_t next_position,
411411
}
412412

413413
// @see https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point
414-
inline bool Tokenizer::is_valid_name_code_point(char cp, bool first) {
414+
inline bool is_valid_name_code_point(char cp, bool first) {
415415
// If first is true return the result of checking if code point is contained
416416
// in the IdentifierStart set of code points. Otherwise return the result of
417417
// checking if code point is contained in the IdentifierPart set of code

Diff for: include/ada/url_pattern.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ struct url_pattern_init {
100100

101101
enum class url_pattern_part_type : uint8_t {
102102
// The part represents a simple fixed text string.
103-
FIXED_TEST,
103+
FIXED_TEXT,
104104
// The part represents a matching group with a custom regular expression.
105105
REGEXP,
106106
// The part represents a matching group that matches code points up to the
@@ -361,9 +361,6 @@ class Tokenizer {
361361
tl::expected<void, url_pattern_errors> process_tokenizing_error(
362362
size_t next_position, size_t value_position);
363363

364-
// @see https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point
365-
bool is_valid_name_code_point(char code_point, bool first);
366-
367364
// has an associated input, a pattern string, initially the empty string.
368365
std::string input{};
369366
// has an associated policy, a tokenize policy, initially "strict".
@@ -573,6 +570,9 @@ std::string convert_modifier_to_string(url_pattern_part_modifier modifier);
573570
std::string generate_segment_wildcard_regexp(
574571
url_pattern_compile_component_options options);
575572

573+
// @see https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point
574+
bool is_valid_name_code_point(char code_point, bool first);
575+
576576
} // namespace url_pattern_helpers
577577

578578
} // namespace ada

Diff for: src/url_pattern.cpp

+182-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "ada.h"
22

33
#include <optional>
4+
#include <ranges>
45
#include <regex>
56
#include <string>
67

@@ -926,7 +927,7 @@ std::vector<Token> tokenize(std::string_view input, token_policy policy) {
926927
bool first_code_point = name_position == name_start;
927928
// Let valid code point be the result of running is a valid name code
928929
// point given tokenizer’s code point and first code point.
929-
auto valid_code_point = tokenizer.is_valid_name_code_point(
930+
auto valid_code_point = is_valid_name_code_point(
930931
tokenizer.code_point.at(0), first_code_point);
931932
// If valid code point is false break.
932933
if (!valid_code_point) break;
@@ -1154,7 +1155,7 @@ std::string escape_regexp_string(std::string_view input) {
11541155
for (const auto& c : input) {
11551156
// TODO: Optimize this even further
11561157
if (should_escape_regexp_char(c)) {
1157-
result.append("\\" + c);
1158+
result.append(std::string("\\") + c);
11581159
} else {
11591160
result.push_back(c);
11601161
}
@@ -1208,10 +1209,184 @@ std::vector<url_pattern_part> parse_pattern_string(
12081209
std::string generate_pattern_string(
12091210
std::vector<url_pattern_part>& part_list,
12101211
url_pattern_compile_component_options& options) {
1211-
(void)part_list;
1212-
(void)options;
1213-
// TODO: Implement this
1214-
return {};
1212+
// Let result be the empty string.
1213+
std::string result{};
1214+
// Let index list be the result of getting the indices for part list.
1215+
// For each index of index list:
1216+
for (size_t index : std::views::iota(size_t{0}, part_list.size())) {
1217+
// Let part be part list[index].
1218+
auto part = part_list[index];
1219+
// Let previous part be part list[index - 1] if index is greater than 0,
1220+
// otherwise let it be null.
1221+
// TODO: Optimization opportunity. Find a way to avoid making a copy here.
1222+
std::optional<url_pattern_part> previous_part =
1223+
index == 0 ? std::nullopt : std::optional(part_list.at(index - 1));
1224+
// Let next part be part list[index + 1] if index is less than index list’s
1225+
// size - 1, otherwise let it be null.
1226+
std::optional<url_pattern_part> next_part =
1227+
index < part_list.size() - 1 ? std::optional(part_list.at(index + 1))
1228+
: std::nullopt;
1229+
// If part’s type is "fixed-text" then:
1230+
if (part.type == url_pattern_part_type::FIXED_TEXT) {
1231+
// If part’s modifier is "none" then:
1232+
if (part.modifier == url_pattern_part_modifier::NONE) {
1233+
// Append the result of running escape a pattern string given part’s
1234+
// value to the end of result.
1235+
result.append(escape_pattern(part.value));
1236+
continue;
1237+
}
1238+
// Append "{" to the end of result.
1239+
result += "{";
1240+
// Append the result of running escape a pattern string given part’s value
1241+
// to the end of result.
1242+
result.append(escape_pattern(part.value));
1243+
// Append "}" to the end of result.
1244+
result += "}";
1245+
// Append the result of running convert a modifier to a string given
1246+
// part’s modifier to the end of result.
1247+
result.append(convert_modifier_to_string(part.modifier));
1248+
continue;
1249+
}
1250+
// Let custom name be true if part’s name[0] is not an ASCII digit;
1251+
// otherwise false.
1252+
// TODO: Optimization opportunity: Find a way to directly check
1253+
// is_ascii_digit.
1254+
bool custom_name = idna::is_ascii(std::string_view(part.name.data(), 1));
1255+
// Let needs grouping be true if at least one of the following are true,
1256+
// otherwise let it be false:
1257+
// - part’s suffix is not the empty string.
1258+
// - part’s prefix is not the empty string and is not options’s prefix code
1259+
// point.
1260+
// TODO: part.prefix is a string, but options.prefix is a char. Which one is
1261+
// true?
1262+
bool needs_grouping =
1263+
!part.suffix.empty() ||
1264+
(!part.prefix.empty() && part.prefix[0] != options.prefix);
1265+
1266+
// If all of the following are true:
1267+
// - needs grouping is false; and
1268+
// - custom name is true; and
1269+
// - part’s type is "segment-wildcard"; and
1270+
// - part’s modifier is "none"; and
1271+
// - next part is not null; and
1272+
// - next part’s prefix is the empty string; and
1273+
// - next part’s suffix is the empty string
1274+
if (!needs_grouping && custom_name &&
1275+
part.type == url_pattern_part_type::SEGMENT_WILDCARD &&
1276+
part.modifier == url_pattern_part_modifier::NONE &&
1277+
next_part.has_value() && next_part->prefix.empty() &&
1278+
next_part->suffix.empty()) {
1279+
// If next part’s type is "fixed-text":
1280+
if (next_part->type == url_pattern_part_type::FIXED_TEXT) {
1281+
// Set needs grouping to true if the result of running is a valid name
1282+
// code point given next part’s value's first code point and the boolean
1283+
// false is true.
1284+
// TODO: Implement this.
1285+
} else {
1286+
// Set needs grouping to true if next part’s name[0] is an ASCII digit.
1287+
needs_grouping =
1288+
idna::is_ascii(std::string_view(next_part->name.data(), 1));
1289+
}
1290+
}
1291+
1292+
// If all of the following are true:
1293+
// - needs grouping is false; and
1294+
// - part’s prefix is the empty string; and
1295+
// - previous part is not null; and
1296+
// - previous part’s type is "fixed-text"; and
1297+
// - previous part’s value's last code point is options’s prefix code point.
1298+
// then set needs grouping to true.
1299+
if (!needs_grouping && part.prefix.empty() && previous_part.has_value() &&
1300+
previous_part->type == url_pattern_part_type::FIXED_TEXT &&
1301+
previous_part->value.at(previous_part->value.size() - 1) ==
1302+
options.prefix.value()) {
1303+
needs_grouping = true;
1304+
}
1305+
1306+
// Assert: part’s name is not the empty string or null.
1307+
ADA_ASSERT_TRUE(!part.name.empty());
1308+
1309+
// If needs grouping is true, then append "{" to the end of result.
1310+
if (needs_grouping) {
1311+
result.append("{");
1312+
}
1313+
1314+
// Append the result of running escape a pattern string given part’s prefix
1315+
// to the end of result.
1316+
result.append(escape_pattern(part.prefix));
1317+
1318+
// If custom name is true:
1319+
if (custom_name) {
1320+
// Append ":" to the end of result.
1321+
result.append(":");
1322+
// Append part’s name to the end of result.
1323+
result.append(part.name);
1324+
}
1325+
1326+
// If part’s type is "regexp" then:
1327+
if (part.type == url_pattern_part_type::REGEXP) {
1328+
// Append "(" to the end of result.
1329+
result.append("(");
1330+
// Append part’s value to the end of result.
1331+
result.append(part.value);
1332+
// Append ")" to the end of result.
1333+
result.append(")");
1334+
} else if (part.type == url_pattern_part_type::SEGMENT_WILDCARD) {
1335+
// Otherwise if part’s type is "segment-wildcard" and custom name is
1336+
// false: Append "(" to the end of result.
1337+
result.append("(");
1338+
// Append the result of running generate a segment wildcard regexp given
1339+
// options to the end of result.
1340+
result.append(generate_segment_wildcard_regexp(options));
1341+
// Append ")" to the end of result.
1342+
result.append(")");
1343+
} else if (part.type == url_pattern_part_type::FULL_WILDCARD) {
1344+
// Otherwise if part’s type is "full-wildcard":
1345+
// If custom name is false and one of the following is true:
1346+
// - previous part is null; or
1347+
// - previous part’s type is "fixed-text"; or
1348+
// - previous part’s modifier is not "none"; or
1349+
// - needs grouping is true; or
1350+
// - part’s prefix is not the empty string
1351+
// - then append "*" to the end of result.
1352+
if (!custom_name &&
1353+
(!previous_part.has_value() ||
1354+
previous_part->type == url_pattern_part_type::FIXED_TEXT ||
1355+
previous_part->modifier != url_pattern_part_modifier::NONE ||
1356+
needs_grouping || !part.prefix.empty())) {
1357+
result.append("*");
1358+
} else {
1359+
// Append "(" to the end of result.
1360+
// Append full wildcard regexp value to the end of result.
1361+
// Append ")" to the end of result.
1362+
result.append("(.*)");
1363+
}
1364+
}
1365+
1366+
// If all of the following are true:
1367+
// - part’s type is "segment-wildcard"; and
1368+
// - custom name is true; and
1369+
// - part’s suffix is not the empty string; and
1370+
// - The result of running is a valid name code point given part’s suffix's
1371+
// first code point and the boolean false is true then append U+005C (\) to
1372+
// the end of result.
1373+
if (part.type == url_pattern_part_type::SEGMENT_WILDCARD && custom_name &&
1374+
!part.suffix.empty() &&
1375+
is_valid_name_code_point(part.suffix[0], true)) {
1376+
result.append("\\");
1377+
}
1378+
1379+
// Append the result of running escape a pattern string given part’s suffix
1380+
// to the end of result.
1381+
result.append(escape_pattern(part.suffix));
1382+
// If needs grouping is true, then append "}" to the end of result.
1383+
if (needs_grouping) result.append("}");
1384+
// Append the result of running convert a modifier to a string given part’s
1385+
// modifier to the end of result.
1386+
result.append(convert_modifier_to_string(part.modifier));
1387+
}
1388+
// Return result.
1389+
return result;
12151390
}
12161391

12171392
} // namespace url_pattern_helpers
@@ -1275,7 +1450,7 @@ generate_regular_expression_and_name_list(
12751450
// For each part of part list:
12761451
for (const url_pattern_part& part : part_list) {
12771452
// If part's type is "fixed-text":
1278-
if (part.type == url_pattern_part_type::FIXED_TEST) {
1453+
if (part.type == url_pattern_part_type::FIXED_TEXT) {
12791454
// If part's modifier is "none"
12801455
if (part.modifier == url_pattern_part_modifier::NONE) {
12811456
// Append the result of running escape a regexp string given part's

0 commit comments

Comments
 (0)