@@ -9,6 +9,15 @@ use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8};
9
9
use crate :: Error ;
10
10
use crate :: Result ;
11
11
12
+ /// Unicode "byte order mark" encoded as UTF-8
13
+ pub ( crate ) const UTF8_BOM : & [ u8 ] = & [ 0xEF , 0xBB , 0xBF ] ;
14
+ /// Unicode "byte order mark" encoded as UTF-16 with little-endian byte order
15
+ #[ allow( dead_code) ]
16
+ pub ( crate ) const UTF16_LE_BOM : & [ u8 ] = & [ 0xFF , 0xFE ] ;
17
+ /// Unicode "byte order mark" encoded as UTF-16 with big-endian byte order
18
+ #[ allow( dead_code) ]
19
+ pub ( crate ) const UTF16_BE_BOM : & [ u8 ] = & [ 0xFE , 0xFF ] ;
20
+
12
21
/// Decoder of byte slices into strings.
13
22
///
14
23
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`
@@ -62,7 +71,7 @@ impl Decoder {
62
71
///
63
72
/// If you instead want to use XML declared encoding, use the `encoding` feature
64
73
pub fn decode_with_bom_removal < ' b > ( & self , bytes : & ' b [ u8 ] ) -> Result < Cow < ' b , str > > {
65
- let bytes = if bytes. starts_with ( & [ 0xEF , 0xBB , 0xBF ] ) {
74
+ let bytes = if bytes. starts_with ( UTF8_BOM ) {
66
75
& bytes[ 3 ..]
67
76
} else {
68
77
bytes
@@ -131,11 +140,11 @@ pub fn decode_with_bom_removal<'b>(bytes: &'b [u8]) -> Result<Cow<'b, str>> {
131
140
132
141
#[ cfg( feature = "encoding" ) ]
133
142
fn split_at_bom < ' b > ( bytes : & ' b [ u8 ] , encoding : & ' static Encoding ) -> ( & ' b [ u8 ] , & ' b [ u8 ] ) {
134
- if encoding == UTF_8 && bytes. starts_with ( & [ 0xEF , 0xBB , 0xBF ] ) {
143
+ if encoding == UTF_8 && bytes. starts_with ( UTF8_BOM ) {
135
144
bytes. split_at ( 3 )
136
- } else if encoding == UTF_16LE && bytes. starts_with ( & [ 0xFF , 0xFE ] ) {
145
+ } else if encoding == UTF_16LE && bytes. starts_with ( UTF16_LE_BOM ) {
137
146
bytes. split_at ( 2 )
138
- } else if encoding == UTF_16BE && bytes. starts_with ( & [ 0xFE , 0xFF ] ) {
147
+ } else if encoding == UTF_16BE && bytes. starts_with ( UTF16_BE_BOM ) {
139
148
bytes. split_at ( 2 )
140
149
} else {
141
150
( & [ ] , bytes)
@@ -172,9 +181,9 @@ fn remove_bom<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> &'b [u8] {
172
181
pub fn detect_encoding ( bytes : & [ u8 ] ) -> Option < & ' static Encoding > {
173
182
match bytes {
174
183
// with BOM
175
- _ if bytes. starts_with ( & [ 0xFE , 0xFF ] ) => Some ( UTF_16BE ) ,
176
- _ if bytes. starts_with ( & [ 0xFF , 0xFE ] ) => Some ( UTF_16LE ) ,
177
- _ if bytes. starts_with ( & [ 0xEF , 0xBB , 0xBF ] ) => Some ( UTF_8 ) ,
184
+ _ if bytes. starts_with ( UTF16_BE_BOM ) => Some ( UTF_16BE ) ,
185
+ _ if bytes. starts_with ( UTF16_LE_BOM ) => Some ( UTF_16LE ) ,
186
+ _ if bytes. starts_with ( UTF8_BOM ) => Some ( UTF_8 ) ,
178
187
179
188
// without BOM
180
189
_ if bytes. starts_with ( & [ 0x00 , b'<' , 0x00 , b'?' ] ) => Some ( UTF_16BE ) , // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2
0 commit comments