diff --git a/src/node/internal/buffer.d.ts b/src/node/internal/buffer.d.ts index 593ab7bf416..065ffec6c49 100644 --- a/src/node/internal/buffer.d.ts +++ b/src/node/internal/buffer.d.ts @@ -55,7 +55,7 @@ export function transcode( source: ArrayBufferView, fromEncoding: Encoding, toEncoding: Encoding -): ArrayBuffer; +): Uint8Array; export const ASCII: Encoding; export const LATIN1: Encoding; diff --git a/src/node/internal/internal_buffer.ts b/src/node/internal/internal_buffer.ts index 1da8c033c1b..7d710bf25ce 100644 --- a/src/node/internal/internal_buffer.ts +++ b/src/node/internal/internal_buffer.ts @@ -2688,9 +2688,13 @@ export function transcode( if (normalizedToEncoding === undefined) { throw new ERR_UNKNOWN_ENCODING(toEncoding); } - return Buffer.from( - bufferUtil.transcode(source, normalizedFromEncoding, normalizedToEncoding) + + const u8: Uint8Array = bufferUtil.transcode( + source, + normalizedFromEncoding, + normalizedToEncoding ); + return Buffer.from(u8.buffer, u8.byteOffset, u8.byteLength); } export function resolveObjectURL(_id: string): unknown { diff --git a/src/workerd/api/node/buffer.c++ b/src/workerd/api/node/buffer.c++ index 6bce7f1221f..43e6374fbed 100644 --- a/src/workerd/api/node/buffer.c++ +++ b/src/workerd/api/node/buffer.c++ @@ -756,15 +756,17 @@ bool BufferUtil::isUtf8(kj::Array buffer) { return simdutf::validate_utf8(buffer.asChars().begin(), buffer.size()); } -kj::Array BufferUtil::transcode( - kj::Array source, EncodingValue rawFromEncoding, EncodingValue rawToEncoding) { +jsg::BufferSource BufferUtil::transcode(jsg::Lock& js, + kj::Array source, + EncodingValue rawFromEncoding, + EncodingValue rawToEncoding) { auto fromEncoding = static_cast(rawFromEncoding); auto toEncoding = static_cast(rawToEncoding); JSG_REQUIRE(i18n::canBeTranscoded(fromEncoding) && i18n::canBeTranscoded(toEncoding), Error, "Unable to transcode buffer due to unsupported encoding"); - return i18n::transcode(source, fromEncoding, toEncoding); + return i18n::transcode(js, source, fromEncoding, toEncoding); } } // namespace workerd::api::node diff --git a/src/workerd/api/node/buffer.h b/src/workerd/api/node/buffer.h index a89f9d0a0f5..24940777d88 100644 --- a/src/workerd/api/node/buffer.h +++ b/src/workerd/api/node/buffer.h @@ -77,8 +77,10 @@ class BufferUtil final: public jsg::Object { jsg::JsString flush(jsg::Lock& js, kj::Array state); bool isAscii(kj::Array bytes); bool isUtf8(kj::Array bytes); - kj::Array transcode( - kj::Array source, EncodingValue rawFromEncoding, EncodingValue rawToEncoding); + jsg::BufferSource transcode(jsg::Lock& js, + kj::Array source, + EncodingValue rawFromEncoding, + EncodingValue rawToEncoding); JSG_RESOURCE_TYPE(BufferUtil) { JSG_METHOD(byteLength); diff --git a/src/workerd/api/node/i18n.c++ b/src/workerd/api/node/i18n.c++ index 22f2262b954..12cdd87c575 100644 --- a/src/workerd/api/node/i18n.c++ +++ b/src/workerd/api/node/i18n.c++ @@ -23,9 +23,9 @@ namespace i18n { namespace { // An isolate has a 128mb memory limit. -const int ISOLATE_LIMIT = 134217728; +constexpr int ISOLATE_LIMIT = 134217728; -const char* getEncodingName(Encoding input) { +constexpr const char* getEncodingName(Encoding input) { switch (input) { case Encoding::ASCII: return "us-ascii"; @@ -40,88 +40,118 @@ const char* getEncodingName(Encoding input) { } } -typedef kj::Maybe> (*TranscodeImpl)( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding); +using TranscodeImpl = kj::Function( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding)>; -kj::Maybe> TranscodeDefault( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { +kj::Maybe TranscodeDefault( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { Converter to(toEncoding); auto substitute = kj::str(kj::repeat('?', to.minCharSize())); to.setSubstituteChars(substitute); Converter from(fromEncoding); size_t limit = source.size() * to.maxCharSize(); + if (limit == 0) { + auto empty = jsg::BackingStore::alloc(js, 0); + return jsg::BufferSource(js, kj::mv(empty)); + } // Workers are limited to 128MB so this isn't actually a realistic concern, but sanity check. JSG_REQUIRE(limit <= ISOLATE_LIMIT, Error, "Source buffer is too large to transcode"); - auto out = kj::heapArray(limit); - char* target = out.asChars().begin(); + + auto out = jsg::BackingStore::alloc(js, limit); + auto outPtr = out.asArrayPtr().asChars(); + char* target = outPtr.begin(); const char* source_ = source.asChars().begin(); UErrorCode status{}; ucnv_convertEx(to.conv(), from.conv(), &target, target + limit, &source_, source_ + source.size(), nullptr, nullptr, nullptr, nullptr, true, true, &status); if (U_SUCCESS(status)) { - return out.first(target - out.asChars().begin()).attach(kj::mv(out)); + out.limit(target - outPtr.begin()); + return jsg::BufferSource(js, kj::mv(out)); } return kj::none; } -kj::Maybe> TranscodeLatin1ToUTF16( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { - auto length_in_chars = source.size() * sizeof(UChar); +kj::Maybe TranscodeLatin1ToUTF16( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { + auto length_in_chars = source.size() * sizeof(char16_t); // Workers are limited to 128MB so this isn't actually a realistic concern, but sanity check. JSG_REQUIRE(length_in_chars <= ISOLATE_LIMIT, Error, "Source buffer is too large to transcode"); + if (length_in_chars == 0) { + auto empty = jsg::BackingStore::alloc(js, 0); + return jsg::BufferSource(js, kj::mv(empty)); + } + Converter from(fromEncoding); - auto destbuf = kj::heapArray(length_in_chars); + auto destBuf = jsg::BackingStore::alloc(js, length_in_chars); + auto destPtr = destBuf.asArrayPtr(); auto actual_length = - simdutf::convert_latin1_to_utf16(source.asChars().begin(), source.size(), destbuf.begin()); + simdutf::convert_latin1_to_utf16(source.asChars().begin(), source.size(), destPtr.begin()); // simdutf returns 0 for invalid value. if (actual_length == 0) { return kj::none; } - return destbuf.first(actual_length).asBytes().attach(kj::mv(destbuf)); + destBuf.limit(actual_length * sizeof(char16_t)); + return jsg::BufferSource(js, kj::mv(destBuf)); } -kj::Maybe> TranscodeFromUTF16( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { +kj::Maybe TranscodeFromUTF16( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { Converter to(toEncoding); auto substitute = kj::str(kj::repeat('?', to.minCharSize())); to.setSubstituteChars(substitute); auto utf16_input = kj::arrayPtr( - reinterpret_cast(source.begin()), source.size() / sizeof(UChar)); + reinterpret_cast(source.begin()), source.size() / sizeof(char16_t)); const auto limit = utf16_input.size() * to.maxCharSize(); // Workers are limited to 128MB so this isn't actually a realistic concern, but sanity check. JSG_REQUIRE(limit <= ISOLATE_LIMIT, Error, "Buffer is too large to transcode"); - auto destbuf = kj::heapArray(limit); + auto length_in_chars = limit * sizeof(char16_t); + if (length_in_chars == 0) { + auto empty = jsg::BackingStore::alloc(js, 0); + return jsg::BufferSource(js, kj::mv(empty)); + } + + auto destBuf = jsg::BackingStore::alloc(js, length_in_chars); + auto destPtr = destBuf.asArrayPtr(); UErrorCode status{}; - auto len = ucnv_fromUChars(to.conv(), destbuf.asChars().begin(), destbuf.size(), + auto len = ucnv_fromUChars(to.conv(), destPtr.asChars().begin(), destPtr.size(), utf16_input.begin(), utf16_input.size(), &status); if (U_SUCCESS(status)) { - return destbuf.first(len).asBytes().attach(kj::mv(destbuf)); + destBuf.limit(len * sizeof(char16_t)); + return jsg::BufferSource(js, kj::mv(destBuf)); } return kj::none; } -kj::Maybe> TranscodeUTF16FromUTF8( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { +kj::Maybe TranscodeUTF16FromUTF8( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { size_t expected_utf16_length = simdutf::utf16_length_from_utf8(source.asChars().begin(), source.size()); // Workers are limited to 128MB so this isn't actually a realistic concern, but sanity check. JSG_REQUIRE(expected_utf16_length <= ISOLATE_LIMIT, Error, "Expected UTF-16le length is too large to transcode"); - auto destbuf = kj::heapArray(expected_utf16_length); + + auto length_in_chars = expected_utf16_length * sizeof(char16_t); + if (length_in_chars == 0) { + auto empty = jsg::BackingStore::alloc(js, 0); + return jsg::BufferSource(js, kj::mv(empty)); + } + + auto destBuf = jsg::BackingStore::alloc(js, length_in_chars); + auto destPtr = destBuf.asArrayPtr(); size_t actual_length = - simdutf::convert_utf8_to_utf16le(source.asChars().begin(), source.size(), destbuf.begin()); + simdutf::convert_utf8_to_utf16le(source.asChars().begin(), source.size(), destPtr.begin()); JSG_REQUIRE(actual_length == expected_utf16_length, Error, "Expected UTF16 length mismatch"); // simdutf returns 0 for invalid UTF-8 value. @@ -129,11 +159,11 @@ kj::Maybe> TranscodeUTF16FromUTF8( return kj::none; } - return destbuf.asBytes().attach(kj::mv(destbuf)); + return jsg::BufferSource(js, kj::mv(destBuf)); } -kj::Maybe> TranscodeUTF8FromUTF16( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { +kj::Maybe TranscodeUTF8FromUTF16( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { JSG_REQUIRE(source.size() % 2 == 0, Error, "UTF-16le input size should be multiple of 2"); auto utf16_input = kj::arrayPtr(reinterpret_cast(source.begin()), source.size() / 2); @@ -144,10 +174,16 @@ kj::Maybe> TranscodeUTF8FromUTF16( JSG_REQUIRE(expected_utf8_length <= ISOLATE_LIMIT, Error, "Expected UTF-8 length is too large to transcode"); - auto destbuf = kj::heapArray(expected_utf8_length); + if (expected_utf8_length == 0) { + auto empty = jsg::BackingStore::alloc(js, 0); + return jsg::BufferSource(js, kj::mv(empty)); + } + + auto destBuf = jsg::BackingStore::alloc(js, expected_utf8_length); + auto destPtr = destBuf.asArrayPtr().asChars(); - size_t actual_length = simdutf::convert_utf16le_to_utf8( - utf16_input.begin(), utf16_input.size(), destbuf.asChars().begin()); + size_t actual_length = + simdutf::convert_utf16le_to_utf8(utf16_input.begin(), utf16_input.size(), destPtr.begin()); JSG_REQUIRE(actual_length == expected_utf8_length, Error, "Expected UTF8 length mismatch"); // simdutf returns 0 for invalid UTF-8 value. @@ -155,7 +191,7 @@ kj::Maybe> TranscodeUTF8FromUTF16( return kj::none; } - return destbuf.asBytes().attach(kj::mv(destbuf)); + return jsg::BufferSource(js, kj::mv(destBuf)); } } // namespace @@ -197,8 +233,8 @@ void Converter::setSubstituteChars(kj::StringPtr sub) { } } -kj::Array transcode( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { +jsg::BufferSource transcode( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding) { TranscodeImpl transcode_function = &TranscodeDefault; switch (fromEncoding) { case Encoding::ASCII: @@ -228,8 +264,8 @@ kj::Array transcode( JSG_FAIL_REQUIRE(Error, "Invalid encoding passed to transcode"); } - return JSG_REQUIRE_NONNULL( - transcode_function(source, fromEncoding, toEncoding), Error, "Unable to transcode buffer"); + return JSG_REQUIRE_NONNULL(transcode_function(js, source, fromEncoding, toEncoding), Error, + "Unable to transcode buffer"); } } // namespace i18n diff --git a/src/workerd/api/node/i18n.h b/src/workerd/api/node/i18n.h index 72094776bf6..7e8b0f3533d 100644 --- a/src/workerd/api/node/i18n.h +++ b/src/workerd/api/node/i18n.h @@ -3,6 +3,8 @@ // https://opensource.org/licenses/Apache-2.0 #pragma once +#include + #include #include @@ -54,8 +56,8 @@ class Converter final { kj::Own conv_; }; -kj::Array transcode( - kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding); +jsg::BufferSource transcode( + jsg::Lock& js, kj::ArrayPtr source, Encoding fromEncoding, Encoding toEncoding); } // namespace i18n diff --git a/src/workerd/api/node/tests/buffer-nodejs-test.js b/src/workerd/api/node/tests/buffer-nodejs-test.js index d5e65618823..3dc88c3ae63 100644 --- a/src/workerd/api/node/tests/buffer-nodejs-test.js +++ b/src/workerd/api/node/tests/buffer-nodejs-test.js @@ -5972,7 +5972,11 @@ export const transcodeTest = { for (const test in tests) { const dest = transcode(orig, 'utf8', test); - strictEqual(dest.length, tests[test].length, `utf8->${test} length`); + strictEqual( + dest.length, + tests[test].length, + `utf8->${test} length (${dest.length}, ${tests[test].length})` + ); for (let n = 0; n < tests[test].length; n++) { strictEqual(dest[n], tests[test][n], `utf8->${test} char ${n}`); } diff --git a/src/workerd/jsg/buffersource.h b/src/workerd/jsg/buffersource.h index 4f212188236..f319ac6b867 100644 --- a/src/workerd/jsg/buffersource.h +++ b/src/workerd/jsg/buffersource.h @@ -197,6 +197,13 @@ class BackingStore { byteLength -= bytes; } + // Similar to trim except that it explicitly sets the byte length to a value + // equal to or less than the current byte length. + inline void limit(size_t bytes) { + KJ_ASSERT(bytes <= byteLength); + byteLength = bytes; + } + inline BackingStore clone() { return BackingStore(backingStore, byteLength, byteOffset, elementSize, ctor, integerType); }