From 75a0ed09ee52e42b6a2189a1690dbd91a3c3e327 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 25 May 2025 13:59:44 +0300 Subject: [PATCH 1/6] Implement SSE4.2 version of `search_n` --- stl/src/vector_algorithms.cpp | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 9fa2144482..40f2cfad58 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -3226,6 +3226,59 @@ namespace { _Mid1 = static_cast(_First); _Rewind_bytes(_First, _lzcnt_u32(~_Carry)); + } else if constexpr (sizeof(_Ty) < 8) { + if (_Count <= 8 / sizeof(_Ty) && _Length >= 16 && _Use_sse42()) { + const int _Bytes_count = static_cast(_Count * sizeof(_Ty)); + const int _Sh1 = sizeof(_Ty) != 1 ? 0 : (_Bytes_count < 4 ? _Bytes_count - 2 : 2); + const int _Sh2 = sizeof(_Ty) >= 4 ? 0 + : _Bytes_count < 4 ? 0 + : (_Bytes_count < 8 ? _Bytes_count - 4 : 4); + + const __m128i _Comparand = _Traits::_Set_sse(_Val); + + const void* _Stop_at = _First; + _Advance_bytes(_Stop_at, _Length & ~size_t{0xF}); + + uint32_t _Carry = 0; + do { + const __m128i _Data = _mm_loadu_si128(reinterpret_cast(_First)); + + const __m128i _Cmp = _Traits::_Cmp_sse(_Comparand, _Data); + const uint32_t _Mask = _mm_movemask_epi8(_Cmp); + + uint32_t _MskX = _Carry | (_Mask << 16); + + _MskX = (_MskX >> sizeof(_Ty)) & _MskX; + + if constexpr (sizeof(_Ty) == 1) { + _MskX = (_MskX >> _Sh1) & _MskX; + } + + if constexpr (sizeof(_Ty) < 4) { + _MskX = (_MskX >> _Sh2) & _MskX; + } + + if (_MskX != 0) { + unsigned long _Pos; + // CodeQL [SM02313] _Pos is always initialized: _MskX != 0 was checked right above. + _BitScanForward(&_Pos, _MskX); + _Advance_bytes(_First, static_cast(_Pos) - 16); + return _First; + } + + _Carry = _Mask; + + _Advance_bytes(_First, 16); + } while (_First != _Stop_at); + + _Mid1 = static_cast(_First); + + unsigned long _Cary_pos; + // CodeQL [SM02313] _Cary_pos is always initialized: (_Carry ^ 0xFFFF) != 0 because if it was, + // _Carry would have been 0xFFFF, which would be a match. + _BitScanReverse(&_Cary_pos, _Carry ^ 0xFFFF); + _Rewind_bytes(_First, 15 - static_cast(_Cary_pos)); + } } #endif // ^^^ !defined(_M_ARM64EC) ^^^ auto _Match_start = static_cast(_First); From af30b3ac13bcaf911e59a059c17213d6c1be822d Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 25 May 2025 14:41:40 +0300 Subject: [PATCH 2/6] Handle AVX tails --- stl/src/vector_algorithms.cpp | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 40f2cfad58..2c1f877f5c 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -3224,6 +3224,53 @@ namespace { _Advance_bytes(_First, 32); } while (_First != _Stop_at); + + if (const size_t _Tail = _Length & 0x1C; _Tail != 0) { + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Tail); + const __m256i _Data = _mm256_maskload_epi32(reinterpret_cast(_First), _Tail_mask); + + const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data); + const uint32_t _Mask = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); + + uint64_t _Msk_with_carry = uint64_t{_Carry} | (uint64_t{_Mask} << 32); + uint64_t _MskX = _Msk_with_carry; + + _MskX = (_MskX >> sizeof(_Ty)) & _MskX; + + if constexpr (sizeof(_Ty) == 1) { + _MskX = __ull_rshift(_MskX, _Sh1) & _MskX; + } + + if constexpr (sizeof(_Ty) < 4) { + _MskX = __ull_rshift(_MskX, _Sh2) & _MskX; + } + + if constexpr (sizeof(_Ty) < 8) { + _MskX = __ull_rshift(_MskX, _Sh3) & _MskX; + } + + if (_MskX != 0) { +#ifdef _M_IX86 + const uint32_t _MskLow = static_cast(_MskX); + + const int _Shift = _MskLow != 0 + ? static_cast(_tzcnt_u32(_MskLow)) - 32 + : static_cast(_tzcnt_u32(static_cast(_MskX >> 32))); + +#elifdef _M_X64 + const long long _Shift = static_cast(_tzcnt_u64(_MskX)) - 32; +#else +#error Unsupported architecture +#endif + _Advance_bytes(_First, _Shift); + return _First; + } + + _Carry = static_cast(__ull_rshift(_Msk_with_carry, static_cast(_Tail))); + + _Advance_bytes(_First, _Tail); + } + _Mid1 = static_cast(_First); _Rewind_bytes(_First, _lzcnt_u32(~_Carry)); } else if constexpr (sizeof(_Ty) < 8) { From 2c9f3b471648b1c47ab94218c749a16da72ad10f Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 25 May 2025 15:09:55 +0300 Subject: [PATCH 3/6] +const --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 2c1f877f5c..acd7605b8f 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -3232,8 +3232,8 @@ namespace { const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data); const uint32_t _Mask = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); - uint64_t _Msk_with_carry = uint64_t{_Carry} | (uint64_t{_Mask} << 32); - uint64_t _MskX = _Msk_with_carry; + const uint64_t _Msk_with_carry = uint64_t{_Carry} | (uint64_t{_Mask} << 32); + uint64_t _MskX = _Msk_with_carry; _MskX = (_MskX >> sizeof(_Ty)) & _MskX; From e9b145af3ac0ac5ff7625ccc003d0667df87cb4c Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Sep 2025 13:01:13 -0700 Subject: [PATCH 4/6] Remove extra newline. --- stl/src/vector_algorithms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 11068dcb94..6fae1fcb20 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -3183,7 +3183,6 @@ namespace { _Advance_bytes(_First, 32); } while (_First != _Stop_at); - if (const size_t _Tail = _Length & 0x1C; _Tail != 0) { const __m256i _Tail_mask = _Avx2_tail_mask_32(_Tail); const __m256i _Data = _mm256_maskload_epi32(reinterpret_cast(_First), _Tail_mask); From e17e4d31f801c05946cd44950bc32155e191c146 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Sep 2025 13:01:36 -0700 Subject: [PATCH 5/6] Fix typo: `_Cary_pos` => `_Carry_pos` --- stl/src/vector_algorithms.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 6fae1fcb20..577bb3b47c 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -3278,11 +3278,11 @@ namespace { _Mid1 = static_cast(_First); - unsigned long _Cary_pos; - // CodeQL [SM02313] _Cary_pos is always initialized: (_Carry ^ 0xFFFF) != 0 because if it was, + unsigned long _Carry_pos; + // CodeQL [SM02313] _Carry_pos is always initialized: (_Carry ^ 0xFFFF) != 0 because if it was, // _Carry would have been 0xFFFF, which would be a match. - _BitScanReverse(&_Cary_pos, _Carry ^ 0xFFFF); - _Rewind_bytes(_First, 15 - static_cast(_Cary_pos)); + _BitScanReverse(&_Carry_pos, _Carry ^ 0xFFFF); + _Rewind_bytes(_First, 15 - static_cast(_Carry_pos)); } } #endif // ^^^ !defined(_M_ARM64EC) ^^^ From 2ee5c4a672278942d0d01c5c4a22ac7b7ddac7ea Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Sep 2025 13:16:49 -0700 Subject: [PATCH 6/6] Reword suppression to be a single line with rationale above. --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 577bb3b47c..f134b4b1a6 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -3279,8 +3279,8 @@ namespace { _Mid1 = static_cast(_First); unsigned long _Carry_pos; - // CodeQL [SM02313] _Carry_pos is always initialized: (_Carry ^ 0xFFFF) != 0 because if it was, - // _Carry would have been 0xFFFF, which would be a match. + // Here, _Carry can't be 0xFFFF, because that would have been a match. Therefore: + // CodeQL [SM02313] _Carry_pos is always initialized: `(_Carry ^ 0xFFFF) != 0` is always true. _BitScanReverse(&_Carry_pos, _Carry ^ 0xFFFF); _Rewind_bytes(_First, 15 - static_cast(_Carry_pos)); }