Skip to content

Commit 5499d77

Browse files
committed
Runtime: rely on TextEncoder and TextDecoder
1 parent 59f4693 commit 5499d77

File tree

1 file changed

+33
-124
lines changed

1 file changed

+33
-124
lines changed

runtime/mlBytes.js

+33-124
Original file line numberDiff line numberDiff line change
@@ -82,115 +82,6 @@ function caml_subarray_to_jsbytes(a, i, len) {
8282
return s;
8383
}
8484

85-
//Provides: caml_utf8_of_utf16
86-
function caml_utf8_of_utf16(s) {
87-
for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) {
88-
c = s.charCodeAt(i);
89-
if (c < 0x80) {
90-
for (var j = i + 1; j < l && (c = s.charCodeAt(j)) < 0x80; j++);
91-
if (j - i > 512) {
92-
t.substr(0, 1);
93-
b += t;
94-
t = "";
95-
b += s.slice(i, j);
96-
} else t += s.slice(i, j);
97-
if (j === l) break;
98-
i = j;
99-
}
100-
if (c < 0x800) {
101-
t += String.fromCharCode(0xc0 | (c >> 6));
102-
t += String.fromCharCode(0x80 | (c & 0x3f));
103-
} else if (c < 0xd800 || c >= 0xdfff) {
104-
t += String.fromCharCode(
105-
0xe0 | (c >> 12),
106-
0x80 | ((c >> 6) & 0x3f),
107-
0x80 | (c & 0x3f),
108-
);
109-
} else if (
110-
c >= 0xdbff ||
111-
i + 1 === l ||
112-
(d = s.charCodeAt(i + 1)) < 0xdc00 ||
113-
d > 0xdfff
114-
) {
115-
// Unmatched surrogate pair, replaced by \ufffd (replacement character)
116-
t += "\xef\xbf\xbd";
117-
} else {
118-
i++;
119-
c = (c << 10) + d - 0x35fdc00;
120-
t += String.fromCharCode(
121-
0xf0 | (c >> 18),
122-
0x80 | ((c >> 12) & 0x3f),
123-
0x80 | ((c >> 6) & 0x3f),
124-
0x80 | (c & 0x3f),
125-
);
126-
}
127-
if (t.length > 1024) {
128-
t.substr(0, 1);
129-
b += t;
130-
t = "";
131-
}
132-
}
133-
return b + t;
134-
}
135-
136-
//Provides: caml_utf16_of_utf8
137-
function caml_utf16_of_utf8(s) {
138-
for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) {
139-
c1 = s.charCodeAt(i);
140-
if (c1 < 0x80) {
141-
for (var j = i + 1; j < l && (c1 = s.charCodeAt(j)) < 0x80; j++);
142-
if (j - i > 512) {
143-
t.substr(0, 1);
144-
b += t;
145-
t = "";
146-
b += s.slice(i, j);
147-
} else t += s.slice(i, j);
148-
if (j === l) break;
149-
i = j;
150-
}
151-
v = 1;
152-
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
153-
c = c2 + (c1 << 6);
154-
if (c1 < 0xe0) {
155-
v = c - 0x3080;
156-
if (v < 0x80) v = 1;
157-
} else {
158-
v = 2;
159-
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
160-
c = c2 + (c << 6);
161-
if (c1 < 0xf0) {
162-
v = c - 0xe2080;
163-
if (v < 0x800 || (v >= 0xd7ff && v < 0xe000)) v = 2;
164-
} else {
165-
v = 3;
166-
if (
167-
++i < l &&
168-
((c2 = s.charCodeAt(i)) & -64) === 128 &&
169-
c1 < 0xf5
170-
) {
171-
v = c2 - 0x3c82080 + (c << 6);
172-
if (v < 0x10000 || v > 0x10ffff) v = 3;
173-
}
174-
}
175-
}
176-
}
177-
}
178-
if (v < 4) {
179-
// Invalid sequence
180-
i -= v;
181-
t += "\ufffd";
182-
} else if (v > 0xffff)
183-
t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3ff));
184-
else t += String.fromCharCode(v);
185-
if (t.length > 1024) {
186-
t.substr(0, 1);
187-
b += t;
188-
t = "";
189-
}
190-
}
191-
return b + t;
192-
}
193-
19485
//Provides: jsoo_is_ascii
19586
function jsoo_is_ascii(s) {
19687
// The regular expression gets better at around this point for all browsers
@@ -429,17 +320,23 @@ function caml_bytes_set(s, i, c) {
429320
return caml_bytes_unsafe_set(s, i, c);
430321
}
431322

323+
//Provides: jsoo_text_encoder
324+
var jsoo_text_encoder = new TextEncoder();
325+
326+
//Provides: jsoo_text_decoder
327+
var jsoo_text_decoder = new TextDecoder();
328+
432329
//Provides: caml_bytes_of_utf16_jsstring
433-
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes
330+
//Requires: MlBytes, jsoo_text_encoder
434331
function caml_bytes_of_utf16_jsstring(s) {
435-
var tag = 9 /* BYTES | ASCII */;
436-
if (!jsoo_is_ascii(s))
437-
(tag = 8) /* BYTES | NOT_ASCII */, (s = caml_utf8_of_utf16(s));
438-
return new MlBytes(tag, s, s.length);
332+
var a = jsoo_text_encoder.encode(s);
333+
return new MlBytes(4, a, a.length);
439334
}
440335

441336
//Provides: MlBytes
442-
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8
337+
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii
338+
//Requires: caml_uint8_array_of_bytes
339+
//Requires: jsoo_text_decoder
443340
function MlBytes(tag, contents, length) {
444341
this.t = tag;
445342
this.c = contents;
@@ -462,9 +359,9 @@ MlBytes.prototype.toString = function () {
462359
}
463360
};
464361
MlBytes.prototype.toUtf16 = function () {
465-
var r = this.toString();
466-
if (this.t === 9) return r;
467-
return caml_utf16_of_utf8(r);
362+
if (this.t === 9) return this.c;
363+
var a = caml_uint8_array_of_bytes(this);
364+
return jsoo_text_decoder.decode(a);
468365
};
469366
MlBytes.prototype.slice = function () {
470367
var content = this.t === 4 ? this.c.slice() : this.c;
@@ -778,20 +675,33 @@ function caml_jsbytes_of_string(x) {
778675
return x;
779676
}
780677

678+
//Provides: jsoo_text_decoder_buff
679+
var jsoo_text_decoder_buff = new ArrayBuffer(1024);
680+
781681
//Provides: caml_jsstring_of_string const
782-
//Requires: jsoo_is_ascii, caml_utf16_of_utf8
682+
//Requires: jsoo_is_ascii
683+
//Requires: jsoo_text_decoder
684+
//Requires: jsoo_text_decoder_buff
783685
//If: js-string
784686
function caml_jsstring_of_string(s) {
785687
if (jsoo_is_ascii(s)) return s;
786-
return caml_utf16_of_utf8(s);
688+
var a =
689+
s.length <= jsoo_text_decoder_buff.length
690+
? new Uint8Array(jsoo_text_decoder_buff, 0, s.length)
691+
: new Uint8Array(s.length);
692+
for (var i = 0; i < s.length; i++) {
693+
a[i] = s.charCodeAt(i);
694+
}
695+
return jsoo_text_decoder.decode(a);
787696
}
788697

789698
//Provides: caml_string_of_jsstring const
790-
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes
699+
//Requires: caml_string_of_array
700+
//Requires: jsoo_text_encoder
791701
//If: js-string
792702
function caml_string_of_jsstring(s) {
793-
if (jsoo_is_ascii(s)) return caml_string_of_jsbytes(s);
794-
else return caml_string_of_jsbytes(caml_utf8_of_utf16(s));
703+
var a = jsoo_text_encoder.encode(s);
704+
return caml_string_of_array(a);
795705
}
796706

797707
//Provides: caml_bytes_of_jsbytes const
@@ -911,7 +821,6 @@ function caml_ml_bytes_content(s) {
911821
}
912822

913823
//Provides: caml_is_ml_string
914-
//Requires: jsoo_is_ascii
915824
//If: js-string
916825
function caml_is_ml_string(s) {
917826
// biome-ignore lint/suspicious/noControlCharactersInRegex: expected

0 commit comments

Comments
 (0)