|
2 | 2 | //! systems: just a `Vec<u8>`/`[u8]`.
|
3 | 3 |
|
4 | 4 | use core::clone::CloneToUninit;
|
| 5 | +use core::str::advance_utf8; |
5 | 6 |
|
6 | 7 | use crate::borrow::Cow;
|
7 | 8 | use crate::collections::TryReserveError;
|
@@ -64,25 +65,37 @@ impl fmt::Debug for Slice {
|
64 | 65 |
|
65 | 66 | impl fmt::Display for Slice {
|
66 | 67 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
67 |
| - // If we're the empty string then our iterator won't actually yield |
68 |
| - // anything, so perform the formatting manually |
69 |
| - if self.inner.is_empty() { |
70 |
| - return "".fmt(f); |
| 68 | + // Corresponds to `Formatter::pad`, but for `OsStr` instead of `str`. |
| 69 | + |
| 70 | + // Make sure there's a fast path up front. |
| 71 | + if f.options().get_width().is_none() && f.options().get_precision().is_none() { |
| 72 | + return self.write_lossy(f); |
71 | 73 | }
|
72 | 74 |
|
73 |
| - for chunk in self.inner.utf8_chunks() { |
74 |
| - let valid = chunk.valid(); |
75 |
| - // If we successfully decoded the whole chunk as a valid string then |
76 |
| - // we can return a direct formatting of the string which will also |
77 |
| - // respect various formatting flags if possible. |
78 |
| - if chunk.invalid().is_empty() { |
79 |
| - return valid.fmt(f); |
80 |
| - } |
| 75 | + // The `precision` field can be interpreted as a maximum width for the |
| 76 | + // string being formatted. |
| 77 | + let max_char_count = f.options().get_precision().unwrap_or(usize::MAX); |
| 78 | + let (truncated, char_count) = truncate_chars(&self.inner, max_char_count); |
| 79 | + |
| 80 | + // If our string is longer than the maximum width, truncate it and |
| 81 | + // handle other flags in terms of the truncated string. |
| 82 | + // SAFETY: The truncation splits at Unicode scalar value boundaries. |
| 83 | + let s = unsafe { Slice::from_encoded_bytes_unchecked(truncated) }; |
81 | 84 |
|
82 |
| - f.write_str(valid)?; |
83 |
| - f.write_char(char::REPLACEMENT_CHARACTER)?; |
| 85 | + // The `width` field is more of a minimum width parameter at this point. |
| 86 | + if let Some(width) = f.options().get_width() |
| 87 | + && char_count < width |
| 88 | + { |
| 89 | + // If we're under the minimum width, then fill up the minimum width |
| 90 | + // with the specified string + some alignment. |
| 91 | + let post_padding = f.padding(width - char_count, fmt::Alignment::Left)?; |
| 92 | + s.write_lossy(f)?; |
| 93 | + post_padding.write(f) |
| 94 | + } else { |
| 95 | + // If we're over the minimum width or there is no minimum width, we |
| 96 | + // can just emit the string. |
| 97 | + s.write_lossy(f) |
84 | 98 | }
|
85 |
| - Ok(()) |
86 | 99 | }
|
87 | 100 | }
|
88 | 101 |
|
@@ -297,6 +310,18 @@ impl Slice {
|
297 | 310 | String::from_utf8_lossy(&self.inner)
|
298 | 311 | }
|
299 | 312 |
|
| 313 | + /// Writes the string as lossy UTF-8 like [`String::from_utf8_lossy`]. |
| 314 | + /// It ignores formatter flags. |
| 315 | + fn write_lossy(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 316 | + for chunk in self.inner.utf8_chunks() { |
| 317 | + f.write_str(chunk.valid())?; |
| 318 | + if !chunk.invalid().is_empty() { |
| 319 | + f.write_char(char::REPLACEMENT_CHARACTER)?; |
| 320 | + } |
| 321 | + } |
| 322 | + Ok(()) |
| 323 | + } |
| 324 | + |
300 | 325 | #[inline]
|
301 | 326 | pub fn to_owned(&self) -> Buf {
|
302 | 327 | Buf { inner: self.inner.to_vec() }
|
@@ -371,3 +396,19 @@ unsafe impl CloneToUninit for Slice {
|
371 | 396 | unsafe { self.inner.clone_to_uninit(dst) }
|
372 | 397 | }
|
373 | 398 | }
|
| 399 | + |
| 400 | +/// Counts the number of Unicode scalar values in the byte string, allowing |
| 401 | +/// invalid UTF-8 sequences. For invalid sequences, the maximal prefix of a |
| 402 | +/// valid UTF-8 code unit counts as one. Only up to `max_chars` scalar values |
| 403 | +/// are scanned. Returns the character count and the byte length. |
| 404 | +fn truncate_chars(bytes: &[u8], max_chars: usize) -> (&[u8], usize) { |
| 405 | + let mut iter = bytes.iter(); |
| 406 | + let mut char_count = 0; |
| 407 | + while !iter.is_empty() && char_count < max_chars { |
| 408 | + advance_utf8(&mut iter); |
| 409 | + char_count += 1; |
| 410 | + } |
| 411 | + let byte_len = bytes.len() - iter.len(); |
| 412 | + let truncated = unsafe { bytes.get_unchecked(..byte_len) }; |
| 413 | + (truncated, char_count) |
| 414 | +} |
0 commit comments