|
15 | 15 | // specific language governing permissions and limitations
|
16 | 16 | // under the License.
|
17 | 17 |
|
18 |
| -use arrow_array::{ArrayAccessor, BooleanArray}; |
| 18 | +use arrow_array::{Array, ArrayAccessor, BooleanArray, StringViewArray}; |
| 19 | +use arrow_buffer::BooleanBuffer; |
19 | 20 | use arrow_schema::ArrowError;
|
20 | 21 | use memchr::memchr2;
|
21 | 22 | use memchr::memmem::Finder;
|
@@ -111,24 +112,130 @@ impl<'a> Predicate<'a> {
|
111 | 112 | Predicate::Eq(v) => BooleanArray::from_unary(array, |haystack| {
|
112 | 113 | (haystack.len() == v.len() && haystack == *v) != negate
|
113 | 114 | }),
|
114 |
| - Predicate::IEqAscii(v) => BooleanArray::from_unary(array, |haystack| { |
115 |
| - haystack.eq_ignore_ascii_case(v) != negate |
116 |
| - }), |
117 |
| - Predicate::Contains(finder) => BooleanArray::from_unary(array, |haystack| { |
118 |
| - finder.find(haystack.as_bytes()).is_some() != negate |
119 |
| - }), |
120 |
| - Predicate::StartsWith(v) => BooleanArray::from_unary(array, |haystack| { |
121 |
| - starts_with(haystack, v, equals_kernel) != negate |
122 |
| - }), |
123 |
| - Predicate::IStartsWithAscii(v) => BooleanArray::from_unary(array, |haystack| { |
124 |
| - starts_with(haystack, v, equals_ignore_ascii_case_kernel) != negate |
125 |
| - }), |
126 |
| - Predicate::EndsWith(v) => BooleanArray::from_unary(array, |haystack| { |
127 |
| - ends_with(haystack, v, equals_kernel) != negate |
128 |
| - }), |
129 |
| - Predicate::IEndsWithAscii(v) => BooleanArray::from_unary(array, |haystack| { |
130 |
| - ends_with(haystack, v, equals_ignore_ascii_case_kernel) != negate |
131 |
| - }), |
| 115 | + Predicate::IEqAscii(v) => { |
| 116 | + if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() { |
| 117 | + let neddle_bytes = v.as_bytes(); |
| 118 | + let null_buffer = string_view_array.logical_nulls(); |
| 119 | + let boolean_buffer = |
| 120 | + BooleanBuffer::collect_bool(string_view_array.len(), |i| { |
| 121 | + unsafe { string_view_array.bytes_unchecked(i) } |
| 122 | + .eq_ignore_ascii_case(neddle_bytes) |
| 123 | + != negate |
| 124 | + }); |
| 125 | + |
| 126 | + BooleanArray::new(boolean_buffer, null_buffer) |
| 127 | + } else { |
| 128 | + BooleanArray::from_unary(array, |haystack| { |
| 129 | + haystack.eq_ignore_ascii_case(v) != negate |
| 130 | + }) |
| 131 | + } |
| 132 | + } |
| 133 | + Predicate::Contains(finder) => { |
| 134 | + if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() { |
| 135 | + let null_buffer = string_view_array.logical_nulls(); |
| 136 | + let boolean_buffer = |
| 137 | + BooleanBuffer::collect_bool(string_view_array.len(), |i| { |
| 138 | + finder |
| 139 | + .find(unsafe { string_view_array.bytes_unchecked(i) }) |
| 140 | + .is_some() |
| 141 | + != negate |
| 142 | + }); |
| 143 | + |
| 144 | + BooleanArray::new(boolean_buffer, null_buffer) |
| 145 | + } else { |
| 146 | + BooleanArray::from_unary(array, |haystack| { |
| 147 | + finder.find(haystack.as_bytes()).is_some() != negate |
| 148 | + }) |
| 149 | + } |
| 150 | + } |
| 151 | + Predicate::StartsWith(v) => { |
| 152 | + if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() { |
| 153 | + let needle_bytes = v.as_bytes(); |
| 154 | + let needle_len = needle_bytes.len(); |
| 155 | + let null_buffer = string_view_array.logical_nulls(); |
| 156 | + let boolean_buffer = |
| 157 | + BooleanBuffer::collect_bool(string_view_array.len(), |i| { |
| 158 | + zip( |
| 159 | + unsafe { string_view_array.prefix_bytes_unchecked(needle_len, i) }, |
| 160 | + needle_bytes, |
| 161 | + ) |
| 162 | + .all(equals_kernel) |
| 163 | + }); |
| 164 | + |
| 165 | + BooleanArray::new(boolean_buffer, null_buffer) |
| 166 | + } else { |
| 167 | + BooleanArray::from_unary(array, |haystack| { |
| 168 | + starts_with(haystack, v, equals_kernel) != negate |
| 169 | + }) |
| 170 | + } |
| 171 | + } |
| 172 | + Predicate::IStartsWithAscii(v) => { |
| 173 | + if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() { |
| 174 | + let needle_bytes = v.as_bytes(); |
| 175 | + let needle_len = needle_bytes.len(); |
| 176 | + let null_buffer = string_view_array.logical_nulls(); |
| 177 | + let boolean_buffer = |
| 178 | + BooleanBuffer::collect_bool(string_view_array.len(), |i| { |
| 179 | + zip( |
| 180 | + unsafe { string_view_array.prefix_bytes_unchecked(needle_len, i) }, |
| 181 | + needle_bytes, |
| 182 | + ) |
| 183 | + .all(equals_ignore_ascii_case_kernel) |
| 184 | + }); |
| 185 | + |
| 186 | + BooleanArray::new(boolean_buffer, null_buffer) |
| 187 | + } else { |
| 188 | + BooleanArray::from_unary(array, |haystack| { |
| 189 | + starts_with(haystack, v, equals_ignore_ascii_case_kernel) != negate |
| 190 | + }) |
| 191 | + } |
| 192 | + } |
| 193 | + Predicate::EndsWith(v) => { |
| 194 | + if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() { |
| 195 | + let needle_bytes = v.as_bytes(); |
| 196 | + let needle_len = needle_bytes.len(); |
| 197 | + let null_buffer = string_view_array.logical_nulls(); |
| 198 | + let boolean_buffer = |
| 199 | + BooleanBuffer::collect_bool(string_view_array.len(), |i| { |
| 200 | + zip( |
| 201 | + unsafe { string_view_array.prefix_bytes_unchecked(needle_len, i) } |
| 202 | + .iter() |
| 203 | + .rev(), |
| 204 | + needle_bytes.iter().rev(), |
| 205 | + ) |
| 206 | + .all(equals_kernel) |
| 207 | + }); |
| 208 | + |
| 209 | + BooleanArray::new(boolean_buffer, null_buffer) |
| 210 | + } else { |
| 211 | + BooleanArray::from_unary(array, |haystack| { |
| 212 | + ends_with(haystack, v, equals_kernel) != negate |
| 213 | + }) |
| 214 | + } |
| 215 | + } |
| 216 | + Predicate::IEndsWithAscii(v) => { |
| 217 | + if let Some(string_view_array) = array.as_any().downcast_ref::<StringViewArray>() { |
| 218 | + let needle_bytes = v.as_bytes(); |
| 219 | + let needle_len = needle_bytes.len(); |
| 220 | + let null_buffer = string_view_array.logical_nulls(); |
| 221 | + let boolean_buffer = |
| 222 | + BooleanBuffer::collect_bool(string_view_array.len(), |i| { |
| 223 | + zip( |
| 224 | + unsafe { string_view_array.prefix_bytes_unchecked(needle_len, i) } |
| 225 | + .iter() |
| 226 | + .rev(), |
| 227 | + needle_bytes.iter().rev(), |
| 228 | + ) |
| 229 | + .all(equals_ignore_ascii_case_kernel) |
| 230 | + }); |
| 231 | + |
| 232 | + BooleanArray::new(boolean_buffer, null_buffer) |
| 233 | + } else { |
| 234 | + BooleanArray::from_unary(array, |haystack| { |
| 235 | + ends_with(haystack, v, equals_ignore_ascii_case_kernel) != negate |
| 236 | + }) |
| 237 | + } |
| 238 | + } |
132 | 239 | Predicate::Regex(v) => {
|
133 | 240 | BooleanArray::from_unary(array, |haystack| v.is_match(haystack) != negate)
|
134 | 241 | }
|
|
0 commit comments