|
1 | 1 | #include "ada.h"
|
2 | 2 |
|
3 | 3 | #include <optional>
|
| 4 | +#include <ranges> |
4 | 5 | #include <regex>
|
5 | 6 | #include <string>
|
6 | 7 |
|
@@ -926,7 +927,7 @@ std::vector<Token> tokenize(std::string_view input, token_policy policy) {
|
926 | 927 | bool first_code_point = name_position == name_start;
|
927 | 928 | // Let valid code point be the result of running is a valid name code
|
928 | 929 | // point given tokenizer’s code point and first code point.
|
929 |
| - auto valid_code_point = tokenizer.is_valid_name_code_point( |
| 930 | + auto valid_code_point = is_valid_name_code_point( |
930 | 931 | tokenizer.code_point.at(0), first_code_point);
|
931 | 932 | // If valid code point is false break.
|
932 | 933 | if (!valid_code_point) break;
|
@@ -1154,7 +1155,7 @@ std::string escape_regexp_string(std::string_view input) {
|
1154 | 1155 | for (const auto& c : input) {
|
1155 | 1156 | // TODO: Optimize this even further
|
1156 | 1157 | if (should_escape_regexp_char(c)) {
|
1157 |
| - result.append("\\" + c); |
| 1158 | + result.append(std::string("\\") + c); |
1158 | 1159 | } else {
|
1159 | 1160 | result.push_back(c);
|
1160 | 1161 | }
|
@@ -1208,10 +1209,184 @@ std::vector<url_pattern_part> parse_pattern_string(
|
1208 | 1209 | std::string generate_pattern_string(
|
1209 | 1210 | std::vector<url_pattern_part>& part_list,
|
1210 | 1211 | url_pattern_compile_component_options& options) {
|
1211 |
| - (void)part_list; |
1212 |
| - (void)options; |
1213 |
| - // TODO: Implement this |
1214 |
| - return {}; |
| 1212 | + // Let result be the empty string. |
| 1213 | + std::string result{}; |
| 1214 | + // Let index list be the result of getting the indices for part list. |
| 1215 | + // For each index of index list: |
| 1216 | + for (size_t index : std::views::iota(size_t{0}, part_list.size())) { |
| 1217 | + // Let part be part list[index]. |
| 1218 | + auto part = part_list[index]; |
| 1219 | + // Let previous part be part list[index - 1] if index is greater than 0, |
| 1220 | + // otherwise let it be null. |
| 1221 | + // TODO: Optimization opportunity. Find a way to avoid making a copy here. |
| 1222 | + std::optional<url_pattern_part> previous_part = |
| 1223 | + index == 0 ? std::nullopt : std::optional(part_list.at(index - 1)); |
| 1224 | + // Let next part be part list[index + 1] if index is less than index list’s |
| 1225 | + // size - 1, otherwise let it be null. |
| 1226 | + std::optional<url_pattern_part> next_part = |
| 1227 | + index < part_list.size() - 1 ? std::optional(part_list.at(index + 1)) |
| 1228 | + : std::nullopt; |
| 1229 | + // If part’s type is "fixed-text" then: |
| 1230 | + if (part.type == url_pattern_part_type::FIXED_TEXT) { |
| 1231 | + // If part’s modifier is "none" then: |
| 1232 | + if (part.modifier == url_pattern_part_modifier::NONE) { |
| 1233 | + // Append the result of running escape a pattern string given part’s |
| 1234 | + // value to the end of result. |
| 1235 | + result.append(escape_pattern(part.value)); |
| 1236 | + continue; |
| 1237 | + } |
| 1238 | + // Append "{" to the end of result. |
| 1239 | + result += "{"; |
| 1240 | + // Append the result of running escape a pattern string given part’s value |
| 1241 | + // to the end of result. |
| 1242 | + result.append(escape_pattern(part.value)); |
| 1243 | + // Append "}" to the end of result. |
| 1244 | + result += "}"; |
| 1245 | + // Append the result of running convert a modifier to a string given |
| 1246 | + // part’s modifier to the end of result. |
| 1247 | + result.append(convert_modifier_to_string(part.modifier)); |
| 1248 | + continue; |
| 1249 | + } |
| 1250 | + // Let custom name be true if part’s name[0] is not an ASCII digit; |
| 1251 | + // otherwise false. |
| 1252 | + // TODO: Optimization opportunity: Find a way to directly check |
| 1253 | + // is_ascii_digit. |
| 1254 | + bool custom_name = idna::is_ascii(std::string_view(part.name.data(), 1)); |
| 1255 | + // Let needs grouping be true if at least one of the following are true, |
| 1256 | + // otherwise let it be false: |
| 1257 | + // - part’s suffix is not the empty string. |
| 1258 | + // - part’s prefix is not the empty string and is not options’s prefix code |
| 1259 | + // point. |
| 1260 | + // TODO: part.prefix is a string, but options.prefix is a char. Which one is |
| 1261 | + // true? |
| 1262 | + bool needs_grouping = |
| 1263 | + !part.suffix.empty() || |
| 1264 | + (!part.prefix.empty() && part.prefix[0] != options.prefix); |
| 1265 | + |
| 1266 | + // If all of the following are true: |
| 1267 | + // - needs grouping is false; and |
| 1268 | + // - custom name is true; and |
| 1269 | + // - part’s type is "segment-wildcard"; and |
| 1270 | + // - part’s modifier is "none"; and |
| 1271 | + // - next part is not null; and |
| 1272 | + // - next part’s prefix is the empty string; and |
| 1273 | + // - next part’s suffix is the empty string |
| 1274 | + if (!needs_grouping && custom_name && |
| 1275 | + part.type == url_pattern_part_type::SEGMENT_WILDCARD && |
| 1276 | + part.modifier == url_pattern_part_modifier::NONE && |
| 1277 | + next_part.has_value() && next_part->prefix.empty() && |
| 1278 | + next_part->suffix.empty()) { |
| 1279 | + // If next part’s type is "fixed-text": |
| 1280 | + if (next_part->type == url_pattern_part_type::FIXED_TEXT) { |
| 1281 | + // Set needs grouping to true if the result of running is a valid name |
| 1282 | + // code point given next part’s value's first code point and the boolean |
| 1283 | + // false is true. |
| 1284 | + // TODO: Implement this. |
| 1285 | + } else { |
| 1286 | + // Set needs grouping to true if next part’s name[0] is an ASCII digit. |
| 1287 | + needs_grouping = |
| 1288 | + idna::is_ascii(std::string_view(next_part->name.data(), 1)); |
| 1289 | + } |
| 1290 | + } |
| 1291 | + |
| 1292 | + // If all of the following are true: |
| 1293 | + // - needs grouping is false; and |
| 1294 | + // - part’s prefix is the empty string; and |
| 1295 | + // - previous part is not null; and |
| 1296 | + // - previous part’s type is "fixed-text"; and |
| 1297 | + // - previous part’s value's last code point is options’s prefix code point. |
| 1298 | + // then set needs grouping to true. |
| 1299 | + if (!needs_grouping && part.prefix.empty() && previous_part.has_value() && |
| 1300 | + previous_part->type == url_pattern_part_type::FIXED_TEXT && |
| 1301 | + previous_part->value.at(previous_part->value.size() - 1) == |
| 1302 | + options.prefix.value()) { |
| 1303 | + needs_grouping = true; |
| 1304 | + } |
| 1305 | + |
| 1306 | + // Assert: part’s name is not the empty string or null. |
| 1307 | + ADA_ASSERT_TRUE(!part.name.empty()); |
| 1308 | + |
| 1309 | + // If needs grouping is true, then append "{" to the end of result. |
| 1310 | + if (needs_grouping) { |
| 1311 | + result.append("{"); |
| 1312 | + } |
| 1313 | + |
| 1314 | + // Append the result of running escape a pattern string given part’s prefix |
| 1315 | + // to the end of result. |
| 1316 | + result.append(escape_pattern(part.prefix)); |
| 1317 | + |
| 1318 | + // If custom name is true: |
| 1319 | + if (custom_name) { |
| 1320 | + // Append ":" to the end of result. |
| 1321 | + result.append(":"); |
| 1322 | + // Append part’s name to the end of result. |
| 1323 | + result.append(part.name); |
| 1324 | + } |
| 1325 | + |
| 1326 | + // If part’s type is "regexp" then: |
| 1327 | + if (part.type == url_pattern_part_type::REGEXP) { |
| 1328 | + // Append "(" to the end of result. |
| 1329 | + result.append("("); |
| 1330 | + // Append part’s value to the end of result. |
| 1331 | + result.append(part.value); |
| 1332 | + // Append ")" to the end of result. |
| 1333 | + result.append(")"); |
| 1334 | + } else if (part.type == url_pattern_part_type::SEGMENT_WILDCARD) { |
| 1335 | + // Otherwise if part’s type is "segment-wildcard" and custom name is |
| 1336 | + // false: Append "(" to the end of result. |
| 1337 | + result.append("("); |
| 1338 | + // Append the result of running generate a segment wildcard regexp given |
| 1339 | + // options to the end of result. |
| 1340 | + result.append(generate_segment_wildcard_regexp(options)); |
| 1341 | + // Append ")" to the end of result. |
| 1342 | + result.append(")"); |
| 1343 | + } else if (part.type == url_pattern_part_type::FULL_WILDCARD) { |
| 1344 | + // Otherwise if part’s type is "full-wildcard": |
| 1345 | + // If custom name is false and one of the following is true: |
| 1346 | + // - previous part is null; or |
| 1347 | + // - previous part’s type is "fixed-text"; or |
| 1348 | + // - previous part’s modifier is not "none"; or |
| 1349 | + // - needs grouping is true; or |
| 1350 | + // - part’s prefix is not the empty string |
| 1351 | + // - then append "*" to the end of result. |
| 1352 | + if (!custom_name && |
| 1353 | + (!previous_part.has_value() || |
| 1354 | + previous_part->type == url_pattern_part_type::FIXED_TEXT || |
| 1355 | + previous_part->modifier != url_pattern_part_modifier::NONE || |
| 1356 | + needs_grouping || !part.prefix.empty())) { |
| 1357 | + result.append("*"); |
| 1358 | + } else { |
| 1359 | + // Append "(" to the end of result. |
| 1360 | + // Append full wildcard regexp value to the end of result. |
| 1361 | + // Append ")" to the end of result. |
| 1362 | + result.append("(.*)"); |
| 1363 | + } |
| 1364 | + } |
| 1365 | + |
| 1366 | + // If all of the following are true: |
| 1367 | + // - part’s type is "segment-wildcard"; and |
| 1368 | + // - custom name is true; and |
| 1369 | + // - part’s suffix is not the empty string; and |
| 1370 | + // - The result of running is a valid name code point given part’s suffix's |
| 1371 | + // first code point and the boolean false is true then append U+005C (\) to |
| 1372 | + // the end of result. |
| 1373 | + if (part.type == url_pattern_part_type::SEGMENT_WILDCARD && custom_name && |
| 1374 | + !part.suffix.empty() && |
| 1375 | + is_valid_name_code_point(part.suffix[0], true)) { |
| 1376 | + result.append("\\"); |
| 1377 | + } |
| 1378 | + |
| 1379 | + // Append the result of running escape a pattern string given part’s suffix |
| 1380 | + // to the end of result. |
| 1381 | + result.append(escape_pattern(part.suffix)); |
| 1382 | + // If needs grouping is true, then append "}" to the end of result. |
| 1383 | + if (needs_grouping) result.append("}"); |
| 1384 | + // Append the result of running convert a modifier to a string given part’s |
| 1385 | + // modifier to the end of result. |
| 1386 | + result.append(convert_modifier_to_string(part.modifier)); |
| 1387 | + } |
| 1388 | + // Return result. |
| 1389 | + return result; |
1215 | 1390 | }
|
1216 | 1391 |
|
1217 | 1392 | } // namespace url_pattern_helpers
|
@@ -1275,7 +1450,7 @@ generate_regular_expression_and_name_list(
|
1275 | 1450 | // For each part of part list:
|
1276 | 1451 | for (const url_pattern_part& part : part_list) {
|
1277 | 1452 | // If part's type is "fixed-text":
|
1278 |
| - if (part.type == url_pattern_part_type::FIXED_TEST) { |
| 1453 | + if (part.type == url_pattern_part_type::FIXED_TEXT) { |
1279 | 1454 | // If part's modifier is "none"
|
1280 | 1455 | if (part.modifier == url_pattern_part_modifier::NONE) {
|
1281 | 1456 | // Append the result of running escape a regexp string given part's
|
|
0 commit comments