1
- /* auto-generated on 2025-04-28 12:16:36 -0400. Do not edit! */
1
+ /* auto-generated on 2025-06-30 19:51:09 -0400. Do not edit! */
2
2
/* begin file src/ada.cpp */
3
3
#include "ada.h"
4
4
/* begin file src/checkers.cpp */
@@ -134,7 +134,7 @@ ada_really_inline constexpr bool verify_dns_length(
134
134
135
135
ADA_PUSH_DISABLE_ALL_WARNINGS
136
136
/* begin file src/ada_idna.cpp */
137
- /* auto-generated on 2025-03-08 13:17:11 -0500 . Do not edit! */
137
+ /* auto-generated on 2025-06-26 23:04:30 -0300 . Do not edit! */
138
138
/* begin file src/idna.cpp */
139
139
/* begin file src/unicode_transcoding.cpp */
140
140
@@ -8157,7 +8157,7 @@ bool utf32_to_punycode(std::u32string_view input, std::string &out) {
8157
8157
++h;
8158
8158
out.push_back(char(c));
8159
8159
}
8160
- if (c > 0x10ffff || (c >= 0xd880 && c < 0xe000)) {
8160
+ if (c > 0x10ffff || (c >= 0xd800 && c < 0xe000)) {
8161
8161
return false;
8162
8162
}
8163
8163
}
@@ -9547,6 +9547,10 @@ bool is_label_valid(const std::u32string_view label) {
9547
9547
#include <ranges>
9548
9548
9549
9549
9550
+ #ifdef ADA_USE_SIMDUTF
9551
+ #include "simdutf.h"
9552
+ #endif
9553
+
9550
9554
namespace ada::idna {
9551
9555
9552
9556
bool constexpr is_ascii(std::u32string_view view) {
@@ -9659,11 +9663,20 @@ std::string to_ascii(std::string_view ut8_string) {
9659
9663
}
9660
9664
static const std::string error = "";
9661
9665
// We convert to UTF-32
9666
+
9667
+ #ifdef ADA_USE_SIMDUTF
9668
+ size_t utf32_length =
9669
+ simdutf::utf32_length_from_utf8(ut8_string.data(), ut8_string.size());
9670
+ std::u32string utf32(utf32_length, '\0');
9671
+ size_t actual_utf32_length = simdutf::convert_utf8_to_utf32(
9672
+ ut8_string.data(), ut8_string.size(), utf32.data());
9673
+ #else
9662
9674
size_t utf32_length =
9663
9675
ada::idna::utf32_length_from_utf8(ut8_string.data(), ut8_string.size());
9664
9676
std::u32string utf32(utf32_length, '\0');
9665
9677
size_t actual_utf32_length = ada::idna::utf8_to_utf32(
9666
9678
ut8_string.data(), ut8_string.size(), utf32.data());
9679
+ #endif
9667
9680
if (actual_utf32_length == 0) {
9668
9681
return error;
9669
9682
}
@@ -9755,6 +9768,10 @@ std::string to_ascii(std::string_view ut8_string) {
9755
9768
#include <string>
9756
9769
9757
9770
9771
+ #ifdef ADA_USE_SIMDUTF
9772
+ #include "simdutf.h"
9773
+ #endif
9774
+
9758
9775
namespace ada::idna {
9759
9776
std::string to_unicode(std::string_view input) {
9760
9777
std::string output;
@@ -9773,11 +9790,19 @@ std::string to_unicode(std::string_view input) {
9773
9790
if (ada::idna::verify_punycode(label_view)) {
9774
9791
std::u32string tmp_buffer;
9775
9792
if (ada::idna::punycode_to_utf32(label_view, tmp_buffer)) {
9793
+ #ifdef ADA_USE_SIMDUTF
9794
+ auto utf8_size = simdutf::utf8_length_from_utf32(tmp_buffer.data(),
9795
+ tmp_buffer.size());
9796
+ std::string final_utf8(utf8_size, '\0');
9797
+ simdutf::convert_utf32_to_utf8(tmp_buffer.data(), tmp_buffer.size(),
9798
+ final_utf8.data());
9799
+ #else
9776
9800
auto utf8_size = ada::idna::utf8_length_from_utf32(tmp_buffer.data(),
9777
9801
tmp_buffer.size());
9778
9802
std::string final_utf8(utf8_size, '\0');
9779
9803
ada::idna::utf32_to_utf8(tmp_buffer.data(), tmp_buffer.size(),
9780
9804
final_utf8.data());
9805
+ #endif
9781
9806
output.append(final_utf8);
9782
9807
} else {
9783
9808
// ToUnicode never fails. If any step fails, then the original input
@@ -11042,7 +11067,7 @@ bool can_parse(std::string_view input, const std::string_view* base_input) {
11042
11067
return result.is_valid;
11043
11068
}
11044
11069
11045
- ada_warn_unused std::string to_string(ada::encoding_type type) {
11070
+ ada_warn_unused std::string_view to_string(ada::encoding_type type) {
11046
11071
switch (type) {
11047
11072
case ada::encoding_type::UTF8:
11048
11073
return "UTF-8";
@@ -12536,35 +12561,67 @@ bool url::set_host_or_hostname(const std::string_view input) {
12536
12561
// Note: the 'found_colon' value is true if and only if a colon was
12537
12562
// encountered while not inside brackets.
12538
12563
if (found_colon) {
12564
+ // If buffer is the empty string, host-missing validation error, return
12565
+ // failure.
12566
+ std::string_view buffer = host_view.substr(0, location);
12567
+ if (buffer.empty()) {
12568
+ return false;
12569
+ }
12570
+
12571
+ // If state override is given and state override is hostname state, then
12572
+ // return failure.
12539
12573
if constexpr (override_hostname) {
12540
12574
return false;
12541
12575
}
12542
- std::string_view buffer = new_host.substr(location + 1);
12543
- if (!buffer.empty()) {
12544
- set_port(buffer);
12576
+
12577
+ // Let host be the result of host parsing buffer with url is not special.
12578
+ bool succeeded = parse_host(buffer);
12579
+ if (!succeeded) {
12580
+ host = std::move(previous_host);
12581
+ update_base_port(previous_port);
12582
+ return false;
12545
12583
}
12546
- }
12547
- // If url is special and host_view is the empty string, validation error,
12548
- // return failure. Otherwise, if state override is given, host_view is the
12549
- // empty string, and either url includes credentials or url's port is
12550
- // non-null, return.
12551
- else if (host_view.empty() &&
12552
- (is_special() || has_credentials() || port.has_value())) {
12553
- return false;
12554
- }
12555
12584
12556
- // Let host be the result of host parsing host_view with url is not special.
12557
- if (host_view.empty() && !is_special()) {
12558
- host = "";
12585
+ // Set url's host to host, buffer to the empty string, and state to port
12586
+ // state.
12587
+ std::string_view port_buffer = new_host.substr(location + 1);
12588
+ if (!port_buffer.empty()) {
12589
+ set_port(port_buffer);
12590
+ }
12559
12591
return true;
12560
12592
}
12593
+ // Otherwise, if one of the following is true:
12594
+ // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
12595
+ // - url is special and c is U+005C (\)
12596
+ else {
12597
+ // If url is special and host_view is the empty string, host-missing
12598
+ // validation error, return failure.
12599
+ if (host_view.empty() && is_special()) {
12600
+ return false;
12601
+ }
12561
12602
12562
- bool succeeded = parse_host(host_view);
12563
- if (!succeeded) {
12564
- host = std::move(previous_host);
12565
- update_base_port(previous_port);
12603
+ // Otherwise, if state override is given, host_view is the empty string,
12604
+ // and either url includes credentials or url's port is non-null, then
12605
+ // return failure.
12606
+ if (host_view.empty() && (has_credentials() || port.has_value())) {
12607
+ return false;
12608
+ }
12609
+
12610
+ // Let host be the result of host parsing host_view with url is not
12611
+ // special.
12612
+ if (host_view.empty() && !is_special()) {
12613
+ host = "";
12614
+ return true;
12615
+ }
12616
+
12617
+ bool succeeded = parse_host(host_view);
12618
+ if (!succeeded) {
12619
+ host = std::move(previous_host);
12620
+ update_base_port(previous_port);
12621
+ return false;
12622
+ }
12623
+ return true;
12566
12624
}
12567
- return succeeded;
12568
12625
}
12569
12626
12570
12627
size_t location = new_host.find_first_of("/\\?");
@@ -12621,10 +12678,16 @@ bool url::set_port(const std::string_view input) {
12621
12678
if (cannot_have_credentials_or_port()) {
12622
12679
return false;
12623
12680
}
12681
+
12682
+ if (input.empty()) {
12683
+ port = std::nullopt;
12684
+ return true;
12685
+ }
12686
+
12624
12687
std::string trimmed(input);
12625
12688
helpers::remove_ascii_tab_or_newline(trimmed);
12689
+
12626
12690
if (trimmed.empty()) {
12627
- port = std::nullopt;
12628
12691
return true;
12629
12692
}
12630
12693
@@ -12633,9 +12696,15 @@ bool url::set_port(const std::string_view input) {
12633
12696
return false;
12634
12697
}
12635
12698
12699
+ // Find the first non-digit character to determine the length of digits
12700
+ auto first_non_digit =
12701
+ std::ranges::find_if_not(trimmed, ada::unicode::is_ascii_digit);
12702
+ std::string_view digits_to_parse =
12703
+ std::string_view(trimmed.data(), first_non_digit - trimmed.begin());
12704
+
12636
12705
// Revert changes if parse_port fails.
12637
12706
std::optional<uint16_t> previous_port = port;
12638
- parse_port(trimmed );
12707
+ parse_port(digits_to_parse );
12639
12708
if (is_valid) {
12640
12709
return true;
12641
12710
}
@@ -13966,10 +14035,16 @@ bool url_aggregator::set_port(const std::string_view input) {
13966
14035
if (cannot_have_credentials_or_port()) {
13967
14036
return false;
13968
14037
}
14038
+
14039
+ if (input.empty()) {
14040
+ clear_port();
14041
+ return true;
14042
+ }
14043
+
13969
14044
std::string trimmed(input);
13970
14045
helpers::remove_ascii_tab_or_newline(trimmed);
14046
+
13971
14047
if (trimmed.empty()) {
13972
- clear_port();
13973
14048
return true;
13974
14049
}
13975
14050
@@ -13978,9 +14053,15 @@ bool url_aggregator::set_port(const std::string_view input) {
13978
14053
return false;
13979
14054
}
13980
14055
14056
+ // Find the first non-digit character to determine the length of digits
14057
+ auto first_non_digit =
14058
+ std::ranges::find_if_not(trimmed, ada::unicode::is_ascii_digit);
14059
+ std::string_view digits_to_parse =
14060
+ std::string_view(trimmed.data(), first_non_digit - trimmed.begin());
14061
+
13981
14062
// Revert changes if parse_port fails.
13982
14063
uint32_t previous_port = components.port;
13983
- parse_port(trimmed );
14064
+ parse_port(digits_to_parse );
13984
14065
if (is_valid) {
13985
14066
return true;
13986
14067
}
@@ -14223,43 +14304,75 @@ bool url_aggregator::set_host_or_hostname(const std::string_view input) {
14223
14304
// Note: the 'found_colon' value is true if and only if a colon was
14224
14305
// encountered while not inside brackets.
14225
14306
if (found_colon) {
14307
+ // If buffer is the empty string, host-missing validation error, return
14308
+ // failure.
14309
+ std::string_view host_buffer = host_view.substr(0, location);
14310
+ if (host_buffer.empty()) {
14311
+ return false;
14312
+ }
14313
+
14314
+ // If state override is given and state override is hostname state, then
14315
+ // return failure.
14226
14316
if constexpr (override_hostname) {
14227
14317
return false;
14228
14318
}
14229
- std::string_view sub_buffer = new_host.substr(location + 1);
14230
- if (!sub_buffer.empty()) {
14231
- set_port(sub_buffer);
14319
+
14320
+ // Let host be the result of host parsing buffer with url is not special.
14321
+ bool succeeded = parse_host(host_buffer);
14322
+ if (!succeeded) {
14323
+ update_base_hostname(previous_host);
14324
+ update_base_port(previous_port);
14325
+ return false;
14232
14326
}
14327
+
14328
+ // Set url's host to host, buffer to the empty string, and state to port
14329
+ // state.
14330
+ std::string_view port_buffer = new_host.substr(location + 1);
14331
+ if (!port_buffer.empty()) {
14332
+ set_port(port_buffer);
14333
+ }
14334
+ return true;
14233
14335
}
14234
- // If url is special and host_view is the empty string, validation error,
14235
- // return failure. Otherwise, if state override is given, host_view is the
14236
- // empty string, and either url includes credentials or url's port is
14237
- // non-null, return.
14238
- else if (host_view.empty() &&
14239
- (is_special() || has_credentials() || has_port())) {
14240
- return false;
14241
- }
14336
+ // Otherwise, if one of the following is true:
14337
+ // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
14338
+ // - url is special and c is U+005C (\)
14339
+ else {
14340
+ // If url is special and host_view is the empty string, host-missing
14341
+ // validation error, return failure.
14342
+ if (host_view.empty() && is_special()) {
14343
+ return false;
14344
+ }
14345
+
14346
+ // Otherwise, if state override is given, host_view is the empty string,
14347
+ // and either url includes credentials or url's port is non-null, then
14348
+ // return failure.
14349
+ if (host_view.empty() && (has_credentials() || has_port())) {
14350
+ return false;
14351
+ }
14242
14352
14243
- // Let host be the result of host parsing host_view with url is not special.
14244
- if (host_view.empty() && !is_special()) {
14245
- if (has_hostname()) {
14246
- clear_hostname(); // easy!
14353
+ // Let host be the result of host parsing host_view with url is not
14354
+ // special.
14355
+ if (host_view.empty() && !is_special()) {
14356
+ if (has_hostname()) {
14357
+ clear_hostname(); // easy!
14358
+ } else if (has_dash_dot()) {
14359
+ add_authority_slashes_if_needed();
14360
+ delete_dash_dot();
14361
+ }
14362
+ return true;
14363
+ }
14364
+
14365
+ bool succeeded = parse_host(host_view);
14366
+ if (!succeeded) {
14367
+ update_base_hostname(previous_host);
14368
+ update_base_port(previous_port);
14369
+ return false;
14247
14370
} else if (has_dash_dot()) {
14248
- add_authority_slashes_if_needed();
14371
+ // Should remove dash_dot from pathname
14249
14372
delete_dash_dot();
14250
14373
}
14251
14374
return true;
14252
14375
}
14253
-
14254
- bool succeeded = parse_host(host_view);
14255
- if (!succeeded) {
14256
- update_base_hostname(previous_host);
14257
- update_base_port(previous_port);
14258
- } else if (has_dash_dot()) {
14259
- // Should remove dash_dot from pathname
14260
- delete_dash_dot();
14261
- }
14262
- return succeeded;
14263
14376
}
14264
14377
14265
14378
size_t location = new_host.find_first_of("/\\?");
0 commit comments