|
11 | 11 | // to_upper : 13656 bytes |
12 | 12 | // Total : 31911 bytes |
13 | 13 |
|
14 | | -#[inline(always)] |
15 | | -const fn bitset_search< |
16 | | - const N: usize, |
17 | | - const CHUNK_SIZE: usize, |
18 | | - const N1: usize, |
19 | | - const CANONICAL: usize, |
20 | | - const CANONICALIZED: usize, |
21 | | ->( |
22 | | - needle: u32, |
23 | | - chunk_idx_map: &[u8; N], |
24 | | - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], |
25 | | - bitset_canonical: &[u64; CANONICAL], |
26 | | - bitset_canonicalized: &[(u8, u8); CANONICALIZED], |
27 | | -) -> bool { |
28 | | - let bucket_idx = (needle / 64) as usize; |
29 | | - let chunk_map_idx = bucket_idx / CHUNK_SIZE; |
30 | | - let chunk_piece = bucket_idx % CHUNK_SIZE; |
31 | | - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. |
32 | | - let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { |
33 | | - chunk_idx_map[chunk_map_idx] |
34 | | - } else { |
35 | | - return false; |
36 | | - }; |
37 | | - let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; |
38 | | - // FIXME(const-hack): Revert to `slice::get` when slice indexing becomes possible in const. |
39 | | - let word = if idx < bitset_canonical.len() { |
40 | | - bitset_canonical[idx] |
41 | | - } else { |
42 | | - let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; |
43 | | - let mut word = bitset_canonical[real_idx as usize]; |
44 | | - let should_invert = mapping & (1 << 6) != 0; |
45 | | - if should_invert { |
46 | | - word = !word; |
47 | | - } |
48 | | - // Lower 6 bits |
49 | | - let quantity = mapping & ((1 << 6) - 1); |
50 | | - if mapping & (1 << 7) != 0 { |
51 | | - // shift |
52 | | - word >>= quantity as u64; |
53 | | - } else { |
54 | | - word = word.rotate_left(quantity as u32); |
55 | | - } |
56 | | - word |
57 | | - }; |
58 | | - (word & (1 << (needle % 64) as u64)) != 0 |
59 | | -} |
60 | | - |
61 | | -#[repr(transparent)] |
62 | | -struct ShortOffsetRunHeader(u32); |
63 | | - |
64 | | -impl ShortOffsetRunHeader { |
65 | | - const fn new(start_index: usize, prefix_sum: u32) -> Self { |
66 | | - assert!(start_index < (1 << 11)); |
67 | | - assert!(prefix_sum < (1 << 21)); |
68 | | - |
69 | | - Self((start_index as u32) << 21 | prefix_sum) |
70 | | - } |
71 | | - |
72 | | - #[inline] |
73 | | - const fn start_index(&self) -> usize { |
74 | | - (self.0 >> 21) as usize |
75 | | - } |
76 | | - |
77 | | - #[inline] |
78 | | - const fn prefix_sum(&self) -> u32 { |
79 | | - self.0 & ((1 << 21) - 1) |
80 | | - } |
81 | | -} |
82 | | - |
83 | | -/// # Safety |
84 | | -/// |
85 | | -/// - The last element of `short_offset_runs` must be greater than `std::char::MAX`. |
86 | | -/// - The start indices of all elements in `short_offset_runs` must be less than `OFFSETS`. |
87 | | -#[inline(always)] |
88 | | -unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>( |
89 | | - needle: char, |
90 | | - short_offset_runs: &[ShortOffsetRunHeader; SOR], |
91 | | - offsets: &[u8; OFFSETS], |
92 | | -) -> bool { |
93 | | - let needle = needle as u32; |
94 | | - |
95 | | - let last_idx = |
96 | | - match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header.0 << 11) { |
97 | | - Ok(idx) => idx + 1, |
98 | | - Err(idx) => idx, |
99 | | - }; |
100 | | - // SAFETY: `last_idx` *cannot* be past the end of the array, as the last |
101 | | - // element is greater than `std::char::MAX` (the largest possible needle) |
102 | | - // as guaranteed by the caller. |
103 | | - // |
104 | | - // So, we cannot have found it (i.e. `Ok(idx) => idx + 1 != length`) and the |
105 | | - // correct location cannot be past it, so `Err(idx) => idx != length` either. |
106 | | - // |
107 | | - // This means that we can avoid bounds checking for the accesses below, too. |
108 | | - // |
109 | | - // We need to use `intrinsics::assume` since the `panic_nounwind` contained |
110 | | - // in `hint::assert_unchecked` may not be optimized out. |
111 | | - unsafe { crate::intrinsics::assume(last_idx < SOR) }; |
112 | | - |
113 | | - let mut offset_idx = short_offset_runs[last_idx].start_index(); |
114 | | - let length = if let Some(next) = short_offset_runs.get(last_idx + 1) { |
115 | | - (*next).start_index() - offset_idx |
116 | | - } else { |
117 | | - offsets.len() - offset_idx |
118 | | - }; |
119 | | - |
120 | | - let prev = |
121 | | - last_idx.checked_sub(1).map(|prev| short_offset_runs[prev].prefix_sum()).unwrap_or(0); |
122 | | - |
123 | | - let total = needle - prev; |
124 | | - let mut prefix_sum = 0; |
125 | | - for _ in 0..(length - 1) { |
126 | | - // SAFETY: It is guaranteed that `length <= OFFSETS - offset_idx`, |
127 | | - // so it follows that `length - 1 + offset_idx < OFFSETS`, therefore |
128 | | - // `offset_idx < OFFSETS` is always true in this loop. |
129 | | - // |
130 | | - // We need to use `intrinsics::assume` since the `panic_nounwind` contained |
131 | | - // in `hint::assert_unchecked` may not be optimized out. |
132 | | - unsafe { crate::intrinsics::assume(offset_idx < OFFSETS) }; |
133 | | - let offset = offsets[offset_idx]; |
134 | | - prefix_sum += offset as u32; |
135 | | - if prefix_sum > total { |
136 | | - break; |
137 | | - } |
138 | | - offset_idx += 1; |
139 | | - } |
140 | | - offset_idx % 2 == 1 |
141 | | -} |
142 | | - |
143 | 14 | pub const UNICODE_VERSION: (u8, u8, u8) = (17, 0, 0); |
| 15 | +use super::rt::*; |
144 | 16 |
|
145 | 17 | #[rustfmt::skip] |
146 | 18 | pub mod alphabetic { |
|
0 commit comments