Skip to content

Commit e42dd4c

Browse files
authored
Merge branch 'main' into is_ascii
2 parents e42c8ab + d1600b0 commit e42dd4c

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

library/core/src/str/validations.rs

+30
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
use super::Utf8Error;
44
use crate::mem;
55

6+
#[cfg(kani)]
7+
use crate::kani;
8+
69
/// Returns the initial codepoint accumulator for the first byte.
710
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
811
/// for width 3, and 3 bits for width 4.
@@ -132,6 +135,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
132135
let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
133136
let align = v.as_ptr().align_offset(usize_bytes);
134137

138+
#[safety::loop_invariant(index <= len + ascii_block_size)]
135139
while index < len {
136140
let old_offset = index;
137141
macro_rules! err {
@@ -211,6 +215,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
211215
// until we find a word containing a non-ascii byte.
212216
if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 {
213217
let ptr = v.as_ptr();
218+
#[safety::loop_invariant(index <= blocks_end + ascii_block_size && align.wrapping_sub(index) % usize_bytes == 0)]
214219
while index < blocks_end {
215220
// SAFETY: since `align - index` and `ascii_block_size` are
216221
// multiples of `usize_bytes`, `block = ptr.add(index)` is
@@ -228,6 +233,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
228233
index += ascii_block_size;
229234
}
230235
// step from the point where the wordwise loop stopped
236+
#[safety::loop_invariant(index <= len)]
231237
while index < len && v[index] < 128 {
232238
index += 1;
233239
}
@@ -271,3 +277,27 @@ pub const fn utf8_char_width(b: u8) -> usize {
271277

272278
/// Mask of the value bits of a continuation byte.
273279
const CONT_MASK: u8 = 0b0011_1111;
280+
281+
#[cfg(kani)]
282+
#[unstable(feature = "kani", issue = "none")]
283+
pub mod verify {
284+
use super::*;
285+
286+
#[kani::proof]
287+
pub fn check_run_utf8_validation() {
288+
if kani::any() {
289+
// TODO: ARR_SIZE can be much larger with cbmc argument
290+
// `--arrays-uf-always`
291+
const ARR_SIZE: usize = 1000;
292+
let mut x: [u8; ARR_SIZE] = kani::any();
293+
let mut xs = kani::slice::any_slice_of_array_mut(&mut x);
294+
run_utf8_validation(xs);
295+
} else {
296+
let ptr = kani::any_where::<usize, _>(|val| *val != 0) as *const u8;
297+
kani::assume(ptr.is_aligned());
298+
unsafe{
299+
run_utf8_validation(crate::slice::from_raw_parts(ptr, 0));
300+
}
301+
}
302+
}
303+
}

0 commit comments

Comments
 (0)