Skip to content

Document unsafe in libcore #66506

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
6 changes: 4 additions & 2 deletions src/libcore/str/lossy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ use crate::str as core_str;
use crate::fmt::{self, Write};
use crate::mem;

// ignore-tidy-undocumented-unsafe

/// Lossy UTF-8 string.
#[unstable(feature = "str_internals", issue = "0")]
pub struct Utf8Lossy {
Expand All @@ -17,6 +15,7 @@ impl Utf8Lossy {
}

pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy {
// SAFETY: both use the same memory layout, and utf8 correctness isn't required
unsafe { mem::transmute(bytes) }
}

Expand Down Expand Up @@ -61,6 +60,7 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
while i < self.source.len() {
let i_ = i;

// SAFETY: 0 <= i < self.source.len()
let byte = unsafe { *self.source.get_unchecked(i) };
i += 1;

Expand All @@ -70,6 +70,7 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
let w = core_str::utf8_char_width(byte);

macro_rules! error { () => ({
// SAFETY: we have checked up to i that source is valid utf8
unsafe {
let r = Utf8LossyChunk {
valid: core_str::from_utf8_unchecked(&self.source[0..i_]),
Expand Down Expand Up @@ -130,6 +131,7 @@ impl<'a> Iterator for Utf8LossyChunksIter<'a> {
}

let r = Utf8LossyChunk {
// SAFETY: we have checked that the entire source is valid utf8
valid: unsafe { core_str::from_utf8_unchecked(self.source) },
broken: &[],
};
Expand Down
44 changes: 35 additions & 9 deletions src/libcore/str/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
// ignore-tidy-filelength
// ignore-tidy-undocumented-unsafe

//! String manipulation.
//!
Expand Down Expand Up @@ -337,6 +336,7 @@ impl Utf8Error {
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
run_utf8_validation(v)?;
// SAFETY: just ran validation
Ok(unsafe { from_utf8_unchecked(v) })
}

Expand Down Expand Up @@ -375,6 +375,7 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
#[stable(feature = "str_mut_extras", since = "1.20.0")]
pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
run_utf8_validation(v)?;
// SAFETY: just ran validation
Ok(unsafe { from_utf8_unchecked_mut(v) })
}

Expand Down Expand Up @@ -567,7 +568,7 @@ impl<'a> Iterator for Chars<'a> {
#[inline]
fn next(&mut self) -> Option<char> {
next_code_point(&mut self.iter).map(|ch| {
// str invariant says `ch` is a valid Unicode Scalar Value
// SAFETY: str invariant says `ch` is a valid Unicode Scalar Value
unsafe {
char::from_u32_unchecked(ch)
}
Expand Down Expand Up @@ -616,7 +617,7 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
#[inline]
fn next_back(&mut self) -> Option<char> {
next_code_point_reverse(&mut self.iter).map(|ch| {
// str invariant says `ch` is a valid Unicode Scalar Value
// SAFETY: str invariant says `ch` is a valid Unicode Scalar Value
unsafe {
char::from_u32_unchecked(ch)
}
Expand Down Expand Up @@ -648,6 +649,7 @@ impl<'a> Chars<'a> {
#[stable(feature = "iter_to_slice", since = "1.4.0")]
#[inline]
pub fn as_str(&self) -> &'a str {
// SAFETY: Chars is only made from a str, which guarantees the iter is valid utf8
unsafe { from_utf8_unchecked(self.iter.as_slice()) }
}
}
Expand Down Expand Up @@ -1080,6 +1082,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
fn get_end(&mut self) -> Option<&'a str> {
if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
self.finished = true;
// SAFETY: self.start and self.end always lie on unicode boudaries
unsafe {
let string = self.matcher.haystack().get_unchecked(self.start..self.end);
Some(string)
Expand All @@ -1095,6 +1098,7 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {

let haystack = self.matcher.haystack();
match self.matcher.next_match() {
// SAFETY: Searcher guarantees that a and b lie on unicode boundaries
Some((a, b)) => unsafe {
let elt = haystack.get_unchecked(self.start..a);
self.start = b;
Expand All @@ -1120,11 +1124,13 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {

let haystack = self.matcher.haystack();
match self.matcher.next_match_back() {
// SAFETY: Searcher guarantees that a and b lie on unicode boundaries
Some((a, b)) => unsafe {
let elt = haystack.get_unchecked(b..self.end);
self.end = a;
Some(elt)
},
// SAFETY: self.start and self.end always lie on unicode boudaries
None => unsafe {
self.finished = true;
Some(haystack.get_unchecked(self.start..self.end))
Expand Down Expand Up @@ -1253,6 +1259,7 @@ where
impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
#[inline]
fn next(&mut self) -> Option<(usize, &'a str)> {
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match().map(|(start, end)| unsafe {
(start, self.0.haystack().get_unchecked(start..end))
})
Expand All @@ -1262,6 +1269,7 @@ impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
fn next_back(&mut self) -> Option<(usize, &'a str)>
where P::Searcher: ReverseSearcher<'a>
{
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match_back().map(|(start, end)| unsafe {
(start, self.0.haystack().get_unchecked(start..end))
})
Expand Down Expand Up @@ -1307,6 +1315,7 @@ where
impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
#[inline]
fn next(&mut self) -> Option<&'a str> {
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match().map(|(a, b)| unsafe {
// Indices are known to be on utf8 boundaries
self.0.haystack().get_unchecked(a..b)
Expand All @@ -1317,6 +1326,7 @@ impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
fn next_back(&mut self) -> Option<&'a str>
where P::Searcher: ReverseSearcher<'a>
{
// SAFETY: Searcher guarantees that start and end lie on unicode boundaries
self.0.next_match_back().map(|(a, b)| unsafe {
// Indices are known to be on utf8 boundaries
self.0.haystack().get_unchecked(a..b)
Expand Down Expand Up @@ -1538,6 +1548,9 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
if align != usize::max_value() && align.wrapping_sub(index) % usize_bytes == 0 {
let ptr = v.as_ptr();
while index < blocks_end {
// SAFETY: since align - index and ascii_block_size are multiples of
// usize_bytes, ptr.add(index) is always aligned with a usize so we may cast
// directly to a const pointer.
unsafe {
let block = ptr.add(index) as *const usize;
// break if there is a nonascii byte
Expand Down Expand Up @@ -1760,6 +1773,7 @@ mod traits {
if self.start <= self.end &&
slice.is_char_boundary(self.start) &&
slice.is_char_boundary(self.end) {
// SAFETY: just checked that start and end are on a char boundary
Some(unsafe { self.get_unchecked(slice) })
} else {
None
Expand All @@ -1770,6 +1784,7 @@ mod traits {
if self.start <= self.end &&
slice.is_char_boundary(self.start) &&
slice.is_char_boundary(self.end) {
// SAFETY: just checked that start and end are on a char boundary
Some(unsafe { self.get_unchecked_mut(slice) })
} else {
None
Expand Down Expand Up @@ -1799,6 +1814,7 @@ mod traits {
if self.start <= self.end &&
slice.is_char_boundary(self.start) &&
slice.is_char_boundary(self.end) {
// SAFETY: just checked that start and end are on a char boundary
unsafe { self.get_unchecked_mut(slice) }
} else {
super::slice_error_fail(slice, self.start, self.end)
Expand Down Expand Up @@ -1827,6 +1843,7 @@ mod traits {
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if slice.is_char_boundary(self.end) {
// SAFETY: just checked that end is on a char boundary
Some(unsafe { self.get_unchecked(slice) })
} else {
None
Expand All @@ -1835,6 +1852,7 @@ mod traits {
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if slice.is_char_boundary(self.end) {
// SAFETY: just checked that end is on a char boundary
Some(unsafe { self.get_unchecked_mut(slice) })
} else {
None
Expand All @@ -1857,8 +1875,8 @@ mod traits {
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
// is_char_boundary checks that the index is in [0, .len()]
if slice.is_char_boundary(self.end) {
// SAFETY: just checked that end is on a char boundary
unsafe { self.get_unchecked_mut(slice) }
} else {
super::slice_error_fail(slice, 0, self.end)
Expand Down Expand Up @@ -1888,6 +1906,7 @@ mod traits {
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if slice.is_char_boundary(self.start) {
// SAFETY: just checked that start is on a char boundary
Some(unsafe { self.get_unchecked(slice) })
} else {
None
Expand All @@ -1896,6 +1915,7 @@ mod traits {
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if slice.is_char_boundary(self.start) {
// SAFETY: just checked that start is on a char boundary
Some(unsafe { self.get_unchecked_mut(slice) })
} else {
None
Expand All @@ -1920,8 +1940,8 @@ mod traits {
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
// is_char_boundary checks that the index is in [0, .len()]
if slice.is_char_boundary(self.start) {
// SAFETY: just checked that start is on a char boundary
unsafe { self.get_unchecked_mut(slice) }
} else {
super::slice_error_fail(slice, self.start, slice.len())
Expand Down Expand Up @@ -2167,7 +2187,6 @@ impl str {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline(always)]
// SAFETY: const sound because we transmute two types with the same layout
#[allow(unused_attributes)]
#[allow_internal_unstable(const_fn_union)]
pub const fn as_bytes(&self) -> &[u8] {
Expand All @@ -2176,6 +2195,7 @@ impl str {
str: &'a str,
slice: &'a [u8],
}
// SAFETY: const sound because we transmute two types with the same layout
unsafe { Slices { str: self }.slice }
}

Expand Down Expand Up @@ -2501,6 +2521,7 @@ impl str {
pub fn split_at(&self, mid: usize) -> (&str, &str) {
// is_char_boundary checks that the index is in [0, .len()]
if self.is_char_boundary(mid) {
// SAFETY: just checked that mid is on a char boundary
unsafe {
(self.get_unchecked(0..mid),
self.get_unchecked(mid..self.len()))
Expand Down Expand Up @@ -2548,6 +2569,7 @@ impl str {
if self.is_char_boundary(mid) {
let len = self.len();
let ptr = self.as_mut_ptr();
// SAFETY: just checked that mid is on a char boundary
unsafe {
(from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
from_utf8_unchecked_mut(slice::from_raw_parts_mut(
Expand Down Expand Up @@ -3746,8 +3768,8 @@ impl str {
if let Some((_, b)) = matcher.next_reject_back() {
j = b;
}
// SAFETY: Searcher is known to return valid indices
unsafe {
// Searcher is known to return valid indices
self.get_unchecked(i..j)
}
}
Expand Down Expand Up @@ -3785,8 +3807,8 @@ impl str {
if let Some((a, _)) = matcher.next_reject() {
i = a;
}
// SAFETY: Searcher is known to return valid indices
unsafe {
// Searcher is known to return valid indices
self.get_unchecked(i..self.len())
}
}
Expand Down Expand Up @@ -3833,8 +3855,8 @@ impl str {
if let Some((_, b)) = matcher.next_reject_back() {
j = b;
}
// SAFETY: Searcher is known to return valid indices
unsafe {
// Searcher is known to return valid indices
self.get_unchecked(0..j)
}
}
Expand Down Expand Up @@ -4029,6 +4051,7 @@ impl str {
/// ```
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
pub fn make_ascii_uppercase(&mut self) {
// SAFETY: safe because we transmute two types with the same layout
let me = unsafe { self.as_bytes_mut() };
me.make_ascii_uppercase()
}
Expand All @@ -4054,6 +4077,7 @@ impl str {
/// ```
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
pub fn make_ascii_lowercase(&mut self) {
// SAFETY: safe because we transmute two types with the same layout
let me = unsafe { self.as_bytes_mut() };
me.make_ascii_lowercase()
}
Expand Down Expand Up @@ -4216,6 +4240,7 @@ impl Default for &str {
#[stable(feature = "default_mut_str", since = "1.28.0")]
impl Default for &mut str {
/// Creates an empty mutable str
// SAFETY: str is guranteed to be utf8
fn default() -> Self { unsafe { from_utf8_unchecked_mut(&mut []) } }
}

Expand Down Expand Up @@ -4270,6 +4295,7 @@ impl_fn_for_zst! {

#[derive(Clone)]
struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
// SAFETY: not safe
unsafe { from_utf8_unchecked(bytes) }
};
}
Expand Down
18 changes: 16 additions & 2 deletions src/libcore/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
//! For more details, see the traits [`Pattern`], [`Searcher`],
//! [`ReverseSearcher`], and [`DoubleEndedSearcher`].

// ignore-tidy-undocumented-unsafe

#![unstable(feature = "pattern",
reason = "API not fully fleshed out and ready to be stabilized",
issue = "27721")]
Expand Down Expand Up @@ -276,6 +274,13 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
#[inline]
fn next(&mut self) -> SearchStep {
let old_finger = self.finger;
// 1. self.finger and self.finger_back are kept on unicode boundaries (this is invariant)
// 2. self.finger >= 0 since it starts at 0 and only increases
// 3. self.finger < self.finger_back because otherwise the char iter would return
// SearchStep::Done
// 4. self.finger comes before the end of the haystack because self.finger_back starts at
// the end and only decreases
// SAFETY: 1-4 guarantee safety of get_unchecked
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
Expand Down Expand Up @@ -303,6 +308,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
return None;
};
// the last byte of the utf8 encoded needle
// SAFETY: we have an invariant that utf8_size < 5
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
if let Some(index) = memchr::memchr(last_byte, bytes) {
// The new finger is the index of the byte we found,
Expand Down Expand Up @@ -346,6 +352,13 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
#[inline]
fn next_back(&mut self) -> SearchStep {
let old_finger = self.finger_back;
// 1. self.finger and self.old_finger are kept on unicode boundaries (this is invariant)
// 2. self.finger >= 0 since it starts at 0 and only increases
// 3. self.finger < self.finger_back because otherwise the char iter would return
// SearchStep::Done
// 4. self.finger comes before the end of the haystack because self.finger_back starts at
// the end and only decreases
// SAFETY: 1-4 guarantee safety of get_unchecked
let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
Expand Down Expand Up @@ -373,6 +386,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
return None;
};
// the last byte of the utf8 encoded needle
// SAFETY: we have an invariant that utf8_size < 5
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
if let Some(index) = memchr::memrchr(last_byte, bytes) {
// we searched a slice that was offset by self.finger,
Expand Down
Loading