3
3
use super :: Utf8Error ;
4
4
use crate :: mem;
5
5
6
+ #[ cfg( kani) ]
7
+ use crate :: kani;
8
+
6
9
/// Returns the initial codepoint accumulator for the first byte.
7
10
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
8
11
/// for width 3, and 3 bits for width 4.
@@ -132,6 +135,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
132
135
let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 } ;
133
136
let align = v. as_ptr ( ) . align_offset ( usize_bytes) ;
134
137
138
+ #[ safety:: loop_invariant( index <= len + ascii_block_size) ]
135
139
while index < len {
136
140
let old_offset = index;
137
141
macro_rules! err {
@@ -211,6 +215,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
211
215
// until we find a word containing a non-ascii byte.
212
216
if align != usize:: MAX && align. wrapping_sub ( index) % usize_bytes == 0 {
213
217
let ptr = v. as_ptr ( ) ;
218
+ #[ safety:: loop_invariant( index <= blocks_end + ascii_block_size && align. wrapping_sub( index) % usize_bytes == 0 ) ]
214
219
while index < blocks_end {
215
220
// SAFETY: since `align - index` and `ascii_block_size` are
216
221
// multiples of `usize_bytes`, `block = ptr.add(index)` is
@@ -228,6 +233,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
228
233
index += ascii_block_size;
229
234
}
230
235
// step from the point where the wordwise loop stopped
236
+ #[ safety:: loop_invariant( index <= len) ]
231
237
while index < len && v[ index] < 128 {
232
238
index += 1 ;
233
239
}
@@ -271,3 +277,27 @@ pub const fn utf8_char_width(b: u8) -> usize {
271
277
272
278
/// Mask of the value bits of a continuation byte.
273
279
const CONT_MASK : u8 = 0b0011_1111 ;
280
+
281
+ #[ cfg( kani) ]
282
+ #[ unstable( feature = "kani" , issue = "none" ) ]
283
+ pub mod verify {
284
+ use super :: * ;
285
+
286
+ #[ kani:: proof]
287
+ pub fn check_run_utf8_validation ( ) {
288
+ if kani:: any ( ) {
289
+ // TODO: ARR_SIZE can be much larger with cbmc argument
290
+ // `--arrays-uf-always`
291
+ const ARR_SIZE : usize = 1000 ;
292
+ let mut x: [ u8 ; ARR_SIZE ] = kani:: any ( ) ;
293
+ let mut xs = kani:: slice:: any_slice_of_array_mut ( & mut x) ;
294
+ run_utf8_validation ( xs) ;
295
+ } else {
296
+ let ptr = kani:: any_where :: < usize , _ > ( |val| * val != 0 ) as * const u8 ;
297
+ kani:: assume ( ptr. is_aligned ( ) ) ;
298
+ unsafe {
299
+ run_utf8_validation ( crate :: slice:: from_raw_parts ( ptr, 0 ) ) ;
300
+ }
301
+ }
302
+ }
303
+ }
0 commit comments