Skip to content

Commit e121dcf

Browse files
authored
Rollup merge of #137154 - thaliaarchi:wtf8-fast-paths, r=ChrisDenton
Add UTF-8 validation fast paths in `Wtf8Buf` This adds two more fast paths for UTF-8 validation in `Wtf8Buf`, making use of the `is_known_utf8` flag added in #96869 (Optimize `Wtf8Buf::into_string` for the case where it contains UTF-8). r? `@ChrisDenton`
2 parents 1cdd386 + eb14652 commit e121dcf

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

library/std/src/sys/os_str/wtf8.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ impl AsInner<Wtf8> for Buf {
4141

4242
impl fmt::Debug for Buf {
4343
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
44-
fmt::Debug::fmt(self.as_slice(), f)
44+
fmt::Debug::fmt(&self.inner, f)
4545
}
4646
}
4747

4848
impl fmt::Display for Buf {
4949
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50-
fmt::Display::fmt(self.as_slice(), f)
50+
fmt::Display::fmt(&self.inner, f)
5151
}
5252
}
5353

library/std/src/sys_common/wtf8.rs

+25-1
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,18 @@ impl fmt::Debug for Wtf8Buf {
169169
}
170170
}
171171

172+
/// Formats the string with unpaired surrogates substituted with the replacement
173+
/// character, U+FFFD.
174+
impl fmt::Display for Wtf8Buf {
175+
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
176+
if let Some(s) = self.as_known_utf8() {
177+
fmt::Display::fmt(s, formatter)
178+
} else {
179+
fmt::Display::fmt(&**self, formatter)
180+
}
181+
}
182+
}
183+
172184
impl Wtf8Buf {
173185
/// Creates a new, empty WTF-8 string.
174186
#[inline]
@@ -262,6 +274,18 @@ impl Wtf8Buf {
262274
unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) }
263275
}
264276

277+
/// Converts the string to UTF-8 without validation, if it was created from
278+
/// valid UTF-8.
279+
#[inline]
280+
fn as_known_utf8(&self) -> Option<&str> {
281+
if self.is_known_utf8 {
282+
// SAFETY: The buffer is known to be valid UTF-8.
283+
Some(unsafe { str::from_utf8_unchecked(self.as_bytes()) })
284+
} else {
285+
None
286+
}
287+
}
288+
265289
/// Reserves capacity for at least `additional` more bytes to be inserted
266290
/// in the given `Wtf8Buf`.
267291
/// The collection may reserve more space to avoid frequent reallocations.
@@ -364,7 +388,7 @@ impl Wtf8Buf {
364388
_ => {
365389
// If we'll be pushing a string containing a surrogate, we may
366390
// no longer have UTF-8.
367-
if other.next_surrogate(0).is_some() {
391+
if self.is_known_utf8 && other.next_surrogate(0).is_some() {
368392
self.is_known_utf8 = false;
369393
}
370394

0 commit comments

Comments
 (0)