Skip to content

Commit

Permalink
Merge pull request #2835 from cloudflare/yagiz/improve-string-wrapper
Browse files Browse the repository at this point in the history
improve stringwrapper bytestring unwrap
  • Loading branch information
anonrig authored Oct 2, 2024
2 parents 3f448b3 + 662b1d8 commit 67cafd3
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/workerd/jsg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ wd_cc_library(
"web-idl.h",
"wrappable.h",
],
implementation_deps = ["@simdutf"],
visibility = ["//visibility:public"],
deps = [
":exception",
Expand All @@ -65,6 +64,7 @@ wd_cc_library(
"//src/workerd/util:thread-scopes",
"//src/workerd/util:uuid",
"@capnp-cpp//src/kj",
"@simdutf",
"@workerd-v8//:v8",
],
)
Expand Down
28 changes: 16 additions & 12 deletions src/workerd/jsg/value.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// Handling of various basic value types: numbers, booleans, strings, optionals, maybes, variants,
// arrays, buffers, dicts.

#include "simdutf.h"
#include "util.h"
#include "web-idl.h"
#include "wrappable.h"
Expand Down Expand Up @@ -468,20 +469,23 @@ class StringWrapper {
kj::Maybe<v8::Local<v8::Object>> parentObject) {
// TODO(cleanup): Move to a HeaderStringWrapper in the api directory.
v8::Local<v8::String> str = check(handle->ToString(context));
auto result =
ByteString(KJ_ASSERT_NONNULL(tryUnwrap(context, str, (kj::String*)nullptr, parentObject)));
if (str->ContainsOnlyOneByte()) {
auto buf = kj::heapArray<kj::byte>(str->Length() + 1);
str->WriteOneByte(context->GetIsolate(), buf.begin());
for (auto b: buf) {
if (b >= 128) {
result.warning = ByteString::Warning::CONTAINS_EXTENDED_ASCII;
break;
}
auto result = ByteString(KJ_ASSERT_NONNULL(
tryUnwrap(context, str, static_cast<kj::String*>(nullptr), parentObject)));

if (!simdutf::validate_ascii(result.begin(), result.size())) {
// If storage is one-byte or the string contains only one-byte
// characters, we know that it contains extended ASCII characters.
//
// The order of execution matters, since ContainsOnlyOneByte()
// will scan the whole string for two-byte storage.
if (str->ContainsOnlyOneByte()) {
result.warning = ByteString::Warning::CONTAINS_EXTENDED_ASCII;
} else {
// Storage is two-bytes and it contains two-byte characters.
result.warning = ByteString::Warning::CONTAINS_UNICODE;
}
} else {
result.warning = ByteString::Warning::CONTAINS_UNICODE;
}

return kj::mv(result);
}
};
Expand Down

0 comments on commit 67cafd3

Please sign in to comment.