Skip to content

Add string search utility #143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/ctre/atoms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct any { };

// actual AST of regexp
template <auto... Str> struct string { };
template <auto... Str> struct string_search { };
template <typename... Opts> struct select { };
template <typename... Content> struct sequence { };
struct empty { };
Expand Down
116 changes: 112 additions & 4 deletions include/ctre/evaluation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,17 @@ template <typename CharT, typename Iterator, typename EndIterator> constexpr CTR
}

template <auto... String, size_t... Idx, typename Iterator, typename EndIterator> constexpr CTRE_FORCE_INLINE string_match_result<Iterator> evaluate_match_string(Iterator current, [[maybe_unused]] const EndIterator end, std::index_sequence<Idx...>) noexcept {

bool same = (compare_character(String, current, end) && ... && true);

return {current, same};
if constexpr (!std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
bool same = (::std::distance(current, end) >= sizeof...(String)) && ((String == *(current + Idx)) & ...);
if (same) {
return {current+=sizeof...(String), same};
} else {
return {current, same};
}
} else {
bool same = (compare_character(String, current, end) && ... && true);
return { current, same };
}
}

template <typename R, typename Iterator, typename EndIterator, auto... String, typename... Tail>
Expand All @@ -132,6 +139,107 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
return evaluate(begin, result.position, end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list<Tail...>());
}

template<typename Ty>
constexpr bool is_prefix(Ty* word, size_t wordlen, ptrdiff_t pos) {
ptrdiff_t suffixlen = wordlen - pos;
for (int i = 0; i < suffixlen; i++) {
if (word[i] != word[pos + i]) {
return false;
}
}
return true;
}

template<typename Ty>
constexpr size_t suffix_length(Ty* word, size_t wordlen, ptrdiff_t pos) {
size_t i = 0;
// increment suffix length i to the first mismatch or beginning of the word
for (; (word[pos - i] == word[wordlen - 1 - i]) && (i < pos); i++);
return i;
}
//MSVC workaround, array operator[] blows up in face if constexpr, use pointers instead
template<typename Ty, auto... String>
constexpr auto make_delta_2(string<String...>) {
std::array<Ty, sizeof...(String)> chars{ String... };
std::array<ptrdiff_t, sizeof...(String)> table;
constexpr size_t patlen = sizeof...(String);
size_t p = 0;
size_t last_prefix_index = patlen - 1;

for (p = patlen - 1; p < patlen; p--) {
if (is_prefix(chars.data(), patlen, p + 1)) {
last_prefix_index = p + 1;
}
table.data()[p] = last_prefix_index + (patlen - 1 - p);
}

for (p = 0; p < patlen - 1; p++) {
size_t slen = suffix_length(chars.data(), patlen, p);
if (chars.data()[p - slen] != chars.data()[patlen - 1 - slen]) {
table.data()[patlen - 1 - slen] = patlen - 1 - p + slen;
}
}

return table;
}

template <typename Iterator, typename EndIterator, auto... String>
constexpr CTRE_FORCE_INLINE string_match_result<Iterator> evaluate_search_string(Iterator current, const EndIterator end, string<String...>) {
if constexpr (sizeof...(String) > 2 && !std::is_same_v<Iterator, utf8_iterator> && is_random_accessible(typename std::iterator_traits<Iterator>::iterator_category{})) {
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
constexpr std::array<ptrdiff_t, sizeof...(String)> delta_2 = make_delta_2<typename ::std::iterator_traits<Iterator>::value_type>(string<String...>());

size_t str_size = std::distance(current, end);
if (str_size < sizeof...(String)) { //quick exit no way to match
return { current, false };
}

size_t i = sizeof...(String) - 1; //index over to the starting location
for (; i < str_size;) {
size_t j = sizeof...(String) - 1;
size_t m = i + 1;
for (; *(current + i) == *(chars.data() + j); --i, --j) { //match string in reverse
if (j == 0) {
return { current + m, true };
}
}
size_t shift = enumeration<String...>::match_char(*(current + i)) ? static_cast<size_t>(*(delta_2.data() + j)) : sizeof...(String);
i += shift;
}

return { current + str_size, false };
} else if (sizeof...(String)) {
//fallback to plain string matching
constexpr std::array<typename ::std::iterator_traits<Iterator>::value_type, sizeof...(String)> chars{ String... };
constexpr typename ::std::iterator_traits<Iterator>::value_type first_char = chars.data()[0];
while (current != end) {
while (current != end && *current != first_char) {
current++;
}
auto result = evaluate_match_string<String...>(current, end, std::make_index_sequence<sizeof...(String)>());
if (result.match) {
return result;
} else {
++current;
}
}
return { current, false };
} else {
return { current, true };
}
}

template <typename R, typename Iterator, typename EndIterator, auto... String, typename... Tail>
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, [[maybe_unused]] const flags& f, R captures, ctll::list<string_search<String...>, Tail...>) noexcept {
auto result = evaluate_search_string(current, end, string<String...>());

if (!result.matched) {
return not_matched;
}

return evaluate(begin, std::advance(result.position, sizeof...(String)), end, consumed_something(f, sizeof...(String) > 0), captures, ctll::list<Tail...>());
}

// matching select in patterns
template <typename R, typename Iterator, typename EndIterator, typename HeadOptions, typename... TailOptions, typename... Tail>
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, const flags & f, R captures, ctll::list<select<HeadOptions, TailOptions...>, Tail...>) noexcept {
Expand Down