@@ -82,115 +82,6 @@ function caml_subarray_to_jsbytes(a, i, len) {
82
82
return s ;
83
83
}
84
84
85
- //Provides: caml_utf8_of_utf16
86
- function caml_utf8_of_utf16 ( s ) {
87
- for ( var b = "" , t = b , c , d , i = 0 , l = s . length ; i < l ; i ++ ) {
88
- c = s . charCodeAt ( i ) ;
89
- if ( c < 0x80 ) {
90
- for ( var j = i + 1 ; j < l && ( c = s . charCodeAt ( j ) ) < 0x80 ; j ++ ) ;
91
- if ( j - i > 512 ) {
92
- t . substr ( 0 , 1 ) ;
93
- b += t ;
94
- t = "" ;
95
- b += s . slice ( i , j ) ;
96
- } else t += s . slice ( i , j ) ;
97
- if ( j === l ) break ;
98
- i = j ;
99
- }
100
- if ( c < 0x800 ) {
101
- t += String . fromCharCode ( 0xc0 | ( c >> 6 ) ) ;
102
- t += String . fromCharCode ( 0x80 | ( c & 0x3f ) ) ;
103
- } else if ( c < 0xd800 || c >= 0xdfff ) {
104
- t += String . fromCharCode (
105
- 0xe0 | ( c >> 12 ) ,
106
- 0x80 | ( ( c >> 6 ) & 0x3f ) ,
107
- 0x80 | ( c & 0x3f ) ,
108
- ) ;
109
- } else if (
110
- c >= 0xdbff ||
111
- i + 1 === l ||
112
- ( d = s . charCodeAt ( i + 1 ) ) < 0xdc00 ||
113
- d > 0xdfff
114
- ) {
115
- // Unmatched surrogate pair, replaced by \ufffd (replacement character)
116
- t += "\xef\xbf\xbd" ;
117
- } else {
118
- i ++ ;
119
- c = ( c << 10 ) + d - 0x35fdc00 ;
120
- t += String . fromCharCode (
121
- 0xf0 | ( c >> 18 ) ,
122
- 0x80 | ( ( c >> 12 ) & 0x3f ) ,
123
- 0x80 | ( ( c >> 6 ) & 0x3f ) ,
124
- 0x80 | ( c & 0x3f ) ,
125
- ) ;
126
- }
127
- if ( t . length > 1024 ) {
128
- t . substr ( 0 , 1 ) ;
129
- b += t ;
130
- t = "" ;
131
- }
132
- }
133
- return b + t ;
134
- }
135
-
136
- //Provides: caml_utf16_of_utf8
137
- function caml_utf16_of_utf8 ( s ) {
138
- for ( var b = "" , t = "" , c , c1 , c2 , v , i = 0 , l = s . length ; i < l ; i ++ ) {
139
- c1 = s . charCodeAt ( i ) ;
140
- if ( c1 < 0x80 ) {
141
- for ( var j = i + 1 ; j < l && ( c1 = s . charCodeAt ( j ) ) < 0x80 ; j ++ ) ;
142
- if ( j - i > 512 ) {
143
- t . substr ( 0 , 1 ) ;
144
- b += t ;
145
- t = "" ;
146
- b += s . slice ( i , j ) ;
147
- } else t += s . slice ( i , j ) ;
148
- if ( j === l ) break ;
149
- i = j ;
150
- }
151
- v = 1 ;
152
- if ( ++ i < l && ( ( c2 = s . charCodeAt ( i ) ) & - 64 ) === 128 ) {
153
- c = c2 + ( c1 << 6 ) ;
154
- if ( c1 < 0xe0 ) {
155
- v = c - 0x3080 ;
156
- if ( v < 0x80 ) v = 1 ;
157
- } else {
158
- v = 2 ;
159
- if ( ++ i < l && ( ( c2 = s . charCodeAt ( i ) ) & - 64 ) === 128 ) {
160
- c = c2 + ( c << 6 ) ;
161
- if ( c1 < 0xf0 ) {
162
- v = c - 0xe2080 ;
163
- if ( v < 0x800 || ( v >= 0xd7ff && v < 0xe000 ) ) v = 2 ;
164
- } else {
165
- v = 3 ;
166
- if (
167
- ++ i < l &&
168
- ( ( c2 = s . charCodeAt ( i ) ) & - 64 ) === 128 &&
169
- c1 < 0xf5
170
- ) {
171
- v = c2 - 0x3c82080 + ( c << 6 ) ;
172
- if ( v < 0x10000 || v > 0x10ffff ) v = 3 ;
173
- }
174
- }
175
- }
176
- }
177
- }
178
- if ( v < 4 ) {
179
- // Invalid sequence
180
- i -= v ;
181
- t += "\ufffd" ;
182
- } else if ( v > 0xffff )
183
- t += String . fromCharCode ( 0xd7c0 + ( v >> 10 ) , 0xdc00 + ( v & 0x3ff ) ) ;
184
- else t += String . fromCharCode ( v ) ;
185
- if ( t . length > 1024 ) {
186
- t . substr ( 0 , 1 ) ;
187
- b += t ;
188
- t = "" ;
189
- }
190
- }
191
- return b + t ;
192
- }
193
-
194
85
//Provides: jsoo_is_ascii
195
86
function jsoo_is_ascii ( s ) {
196
87
// The regular expression gets better at around this point for all browsers
@@ -429,17 +320,23 @@ function caml_bytes_set(s, i, c) {
429
320
return caml_bytes_unsafe_set ( s , i , c ) ;
430
321
}
431
322
323
+ //Provides: jsoo_text_encoder
324
+ var jsoo_text_encoder = new TextEncoder ( ) ;
325
+
326
+ //Provides: jsoo_text_decoder
327
+ var jsoo_text_decoder = new TextDecoder ( ) ;
328
+
432
329
//Provides: caml_bytes_of_utf16_jsstring
433
- //Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes
330
+ //Requires: MlBytes, jsoo_text_encoder
434
331
function caml_bytes_of_utf16_jsstring ( s ) {
435
- var tag = 9 /* BYTES | ASCII */ ;
436
- if ( ! jsoo_is_ascii ( s ) )
437
- ( tag = 8 ) /* BYTES | NOT_ASCII */ , ( s = caml_utf8_of_utf16 ( s ) ) ;
438
- return new MlBytes ( tag , s , s . length ) ;
332
+ var a = jsoo_text_encoder . encode ( s ) ;
333
+ return new MlBytes ( 4 , a , a . length ) ;
439
334
}
440
335
441
336
//Provides: MlBytes
442
- //Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8
337
+ //Requires: caml_convert_string_to_bytes, jsoo_is_ascii
338
+ //Requires: caml_uint8_array_of_bytes
339
+ //Requires: jsoo_text_decoder
443
340
function MlBytes ( tag , contents , length ) {
444
341
this . t = tag ;
445
342
this . c = contents ;
@@ -462,9 +359,9 @@ MlBytes.prototype.toString = function () {
462
359
}
463
360
} ;
464
361
MlBytes . prototype . toUtf16 = function ( ) {
465
- var r = this . toString ( ) ;
466
- if ( this . t === 9 ) return r ;
467
- return caml_utf16_of_utf8 ( r ) ;
362
+ if ( this . t === 9 ) return this . c ;
363
+ var a = caml_uint8_array_of_bytes ( this ) ;
364
+ return jsoo_text_decoder . decode ( a ) ;
468
365
} ;
469
366
MlBytes . prototype . slice = function ( ) {
470
367
var content = this . t === 4 ? this . c . slice ( ) : this . c ;
@@ -778,20 +675,33 @@ function caml_jsbytes_of_string(x) {
778
675
return x ;
779
676
}
780
677
678
+ //Provides: jsoo_text_decoder_buff
679
+ var jsoo_text_decoder_buff = new ArrayBuffer ( 1024 ) ;
680
+
781
681
//Provides: caml_jsstring_of_string const
782
- //Requires: jsoo_is_ascii, caml_utf16_of_utf8
682
+ //Requires: jsoo_is_ascii
683
+ //Requires: jsoo_text_decoder
684
+ //Requires: jsoo_text_decoder_buff
783
685
//If: js-string
784
686
function caml_jsstring_of_string ( s ) {
785
687
if ( jsoo_is_ascii ( s ) ) return s ;
786
- return caml_utf16_of_utf8 ( s ) ;
688
+ var a =
689
+ s . length <= jsoo_text_decoder_buff . length
690
+ ? new Uint8Array ( jsoo_text_decoder_buff , 0 , s . length )
691
+ : new Uint8Array ( s . length ) ;
692
+ for ( var i = 0 ; i < s . length ; i ++ ) {
693
+ a [ i ] = s . charCodeAt ( i ) ;
694
+ }
695
+ return jsoo_text_decoder . decode ( a ) ;
787
696
}
788
697
789
698
//Provides: caml_string_of_jsstring const
790
- //Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes
699
+ //Requires: caml_string_of_array
700
+ //Requires: jsoo_text_encoder
791
701
//If: js-string
792
702
function caml_string_of_jsstring ( s ) {
793
- if ( jsoo_is_ascii ( s ) ) return caml_string_of_jsbytes ( s ) ;
794
- else return caml_string_of_jsbytes ( caml_utf8_of_utf16 ( s ) ) ;
703
+ var a = jsoo_text_encoder . encode ( s ) ;
704
+ return caml_string_of_array ( a ) ;
795
705
}
796
706
797
707
//Provides: caml_bytes_of_jsbytes const
@@ -911,7 +821,6 @@ function caml_ml_bytes_content(s) {
911
821
}
912
822
913
823
//Provides: caml_is_ml_string
914
- //Requires: jsoo_is_ascii
915
824
//If: js-string
916
825
function caml_is_ml_string ( s ) {
917
826
// biome-ignore lint/suspicious/noControlCharactersInRegex: expected
0 commit comments