Skip to content

Commit b302b6f

Browse files
committed
Add a write_bom() method to the Writer
1 parent e27feab commit b302b6f

File tree

3 files changed

+59
-12
lines changed

3 files changed

+59
-12
lines changed

Changelog.md

+5-3
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
- [#450]: Added support of asynchronous [tokio](https://tokio.rs/) readers
4141
- [#455]: Change return type of all `read_to_end*` methods to return a span between tags
4242
- [#455]: Added `Reader::read_text` method to return a raw content (including markup) between tags
43-
43+
- [#459]: Added a `Writer::write_bom()` method for inserting a Byte-Order-Mark into the document.
4444

4545
### Bug Fixes
4646

@@ -175,11 +175,13 @@
175175
- [#440]: Removed `Deserializer::from_slice` and `quick_xml::de::from_slice` methods because deserializing from a byte
176176
array cannot guarantee borrowing due to possible copying while decoding.
177177

178-
- [#455]: Removed `Reader::read_text_into` which is only not a better wrapper over match on `Event::Text`
178+
- [#455]: Removed `Reader::read_text_into` which is just a thin wrapper over match on `Event::Text`
179179

180180
- [#456]: Reader and writer stuff grouped under `reader` and `writer` modules.
181181
You still can use re-exported definitions from a crate root
182182

183+
- [#459]: Made the `Writer::write()` method non-public as writing random bytes to a document is not generally useful or desirable.
184+
183185
### New Tests
184186

185187
- [#9]: Added tests for incorrect nested tags in input
@@ -223,7 +225,7 @@
223225
[#450]: https://github.com/tafia/quick-xml/pull/450
224226
[#455]: https://github.com/tafia/quick-xml/pull/455
225227
[#456]: https://github.com/tafia/quick-xml/pull/456
226-
228+
[#458]: https://github.com/tafia/quick-xml/pull/458
227229

228230
## 0.23.0 -- 2022-05-08
229231

src/encoding.rs

+16-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@ use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8};
99
use crate::Error;
1010
use crate::Result;
1111

12+
/// Unicode "byte order mark" encoded as UTF-8
13+
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
14+
/// Unicode "byte order mark" encoded as UTF-16 with little-endian byte order
15+
#[allow(dead_code)]
16+
pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
17+
/// Unicode "byte order mark" encoded as UTF-16 with big-endian byte order
18+
#[allow(dead_code)]
19+
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];
20+
1221
/// Decoder of byte slices into strings.
1322
///
1423
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`
@@ -62,7 +71,7 @@ impl Decoder {
6271
///
6372
/// If you instead want to use XML declared encoding, use the `encoding` feature
6473
pub fn decode_with_bom_removal<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
65-
let bytes = if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
74+
let bytes = if bytes.starts_with(UTF8_BOM) {
6675
&bytes[3..]
6776
} else {
6877
bytes
@@ -131,11 +140,11 @@ pub fn decode_with_bom_removal<'b>(bytes: &'b [u8]) -> Result<Cow<'b, str>> {
131140

132141
#[cfg(feature = "encoding")]
133142
fn split_at_bom<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> (&'b [u8], &'b [u8]) {
134-
if encoding == UTF_8 && bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
143+
if encoding == UTF_8 && bytes.starts_with(UTF8_BOM) {
135144
bytes.split_at(3)
136-
} else if encoding == UTF_16LE && bytes.starts_with(&[0xFF, 0xFE]) {
145+
} else if encoding == UTF_16LE && bytes.starts_with(UTF16_LE_BOM) {
137146
bytes.split_at(2)
138-
} else if encoding == UTF_16BE && bytes.starts_with(&[0xFE, 0xFF]) {
147+
} else if encoding == UTF_16BE && bytes.starts_with(UTF16_BE_BOM) {
139148
bytes.split_at(2)
140149
} else {
141150
(&[], bytes)
@@ -172,9 +181,9 @@ fn remove_bom<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> &'b [u8] {
172181
pub fn detect_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
173182
match bytes {
174183
// with BOM
175-
_ if bytes.starts_with(&[0xFE, 0xFF]) => Some(UTF_16BE),
176-
_ if bytes.starts_with(&[0xFF, 0xFE]) => Some(UTF_16LE),
177-
_ if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) => Some(UTF_8),
184+
_ if bytes.starts_with(UTF16_BE_BOM) => Some(UTF_16BE),
185+
_ if bytes.starts_with(UTF16_LE_BOM) => Some(UTF_16LE),
186+
_ if bytes.starts_with(UTF8_BOM) => Some(UTF_8),
178187

179188
// without BOM
180189
_ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some(UTF_16BE), // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2

src/writer.rs

+38-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
//! Contains high-level interface for an events-based XML emitter.
22
3+
use std::io::Write;
4+
5+
use crate::encoding::UTF8_BOM;
36
use crate::errors::{Error, Result};
47
use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event};
5-
use std::io::Write;
68

79
/// XML writer.
810
///
@@ -86,6 +88,39 @@ impl<W: Write> Writer<W> {
8688
&mut self.writer
8789
}
8890

91+
/// Write a Byte-Order-Mark character to the document.
92+
///
93+
/// # Example
94+
///
95+
/// ```rust
96+
/// # use quick_xml::Result;
97+
/// # fn main() -> Result<()> {
98+
/// use quick_xml::events::{BytesStart, BytesText, Event};
99+
/// use quick_xml::writer::Writer;
100+
/// use quick_xml::Error;
101+
/// use std::io::Cursor;
102+
///
103+
/// let mut buffer = Vec::new();
104+
/// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
105+
///
106+
/// writer.write_bom()?;
107+
/// writer
108+
/// .create_element("empty")
109+
/// .with_attribute(("attr1", "value1"))
110+
/// .write_empty()
111+
/// .expect("failure");
112+
///
113+
/// assert_eq!(
114+
/// std::str::from_utf8(&buffer).unwrap(),
115+
/// "\u{FEFF}<empty attr1=\"value1\"/>"
116+
/// );
117+
/// # Ok(())
118+
/// # }
119+
/// ```
120+
pub fn write_bom(&mut self) -> Result<()> {
121+
self.write(UTF8_BOM)
122+
}
123+
89124
/// Writes the given event to the underlying writer.
90125
pub fn write_event<'a, E: AsRef<Event<'a>>>(&mut self, event: E) -> Result<()> {
91126
let mut next_should_line_break = true;
@@ -128,7 +163,7 @@ impl<W: Write> Writer<W> {
128163

129164
/// Writes bytes
130165
#[inline]
131-
pub fn write(&mut self, value: &[u8]) -> Result<()> {
166+
pub(crate) fn write(&mut self, value: &[u8]) -> Result<()> {
132167
self.writer.write_all(value).map_err(Error::Io)
133168
}
134169

@@ -502,6 +537,7 @@ mod indentation {
502537
</paired>"#
503538
);
504539
}
540+
505541
#[test]
506542
fn element_writer_empty() {
507543
let mut buffer = Vec::new();

0 commit comments

Comments
 (0)