Skip to content

Commit d14604d

Browse files
Adds regex_dfa library (repurposed from klex project)
Signed-off-by: Christian Parpart <[email protected]>
1 parent ae0fc93 commit d14604d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+10427
-0
lines changed

Diff for: .clang-format

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ IncludeCategories:
7777
Priority: 3
7878
- Regex: '^<(vtrasterizer)/'
7979
Priority: 4
80+
- Regex: '^<(regex_dfa)/'
81+
Priority: 5
8082
- Regex: '^<(text_shaper)/'
8183
Priority: 5
8284
- Regex: '^<(crispy)/'

Diff for: src/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ endif()
55
include(PedanticCompiler)
66

77
add_subdirectory(crispy)
8+
add_subdirectory(regex_dfa)
89
add_subdirectory(text_shaper)
910
add_subdirectory(vtpty)
1011
add_subdirectory(vtparser)

Diff for: src/regex_dfa/Alphabet.cpp

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// This file is part of the "klex" project, http://github.com/christianparpart/klex>
2+
// (c) 2018 Christian Parpart <[email protected]>
3+
//
4+
// Licensed under the MIT License (the "License"); you may not use this
5+
// file except in compliance with the License. You may obtain a copy of
6+
// the License at: http://opensource.org/licenses/MIT
7+
8+
#include <regex_dfa/Alphabet.h>
9+
#include <regex_dfa/Symbols.h>
10+
11+
#include <iomanip>
12+
#include <iostream>
13+
#include <sstream>
14+
15+
using namespace std;
16+
17+
namespace regex_dfa
18+
{
19+
20+
#if 0
21+
#define DEBUG(msg, ...) \
22+
do \
23+
{ \
24+
cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
25+
} while (0)
26+
#else
27+
#define DEBUG(msg, ...) \
28+
do \
29+
{ \
30+
} while (0)
31+
#endif
32+
33+
void Alphabet::insert(Symbol ch)
34+
{
35+
if (alphabet_.find(ch) == alphabet_.end())
36+
{
37+
DEBUG("Alphabet: insert '{:}'", prettySymbol(ch));
38+
alphabet_.insert(ch);
39+
}
40+
}
41+
42+
string Alphabet::to_string() const
43+
{
44+
stringstream sstr;
45+
46+
sstr << '{';
47+
48+
for (Symbol c: alphabet_)
49+
sstr << prettySymbol(c);
50+
51+
sstr << '}';
52+
53+
return sstr.str();
54+
}
55+
56+
} // namespace regex_dfa

Diff for: src/regex_dfa/Alphabet.h

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// This file is part of the "klex" project, http://github.com/christianparpart/klex>
2+
// (c) 2018 Christian Parpart <[email protected]>
3+
//
4+
// Licensed under the MIT License (the "License"); you may not use this
5+
// file except in compliance with the License. You may obtain a copy of
6+
// the License at: http://opensource.org/licenses/MIT
7+
#pragma once
8+
9+
#include <regex_dfa/Symbols.h>
10+
11+
#include <fmt/format.h>
12+
13+
#include <set>
14+
#include <string>
15+
16+
namespace regex_dfa
17+
{
18+
19+
/**
20+
* Represents the alphabet of a finite automaton or regular expression.
21+
*/
22+
class Alphabet
23+
{
24+
public:
25+
using set_type = std::set<Symbol>;
26+
using iterator = set_type::iterator;
27+
28+
size_t size() const noexcept { return alphabet_.size(); }
29+
30+
void insert(Symbol ch);
31+
32+
std::string to_string() const;
33+
34+
const iterator begin() const { return alphabet_.begin(); }
35+
const iterator end() const { return alphabet_.end(); }
36+
37+
private:
38+
set_type alphabet_;
39+
};
40+
41+
} // namespace regex_dfa
42+
43+
namespace fmt
44+
{
45+
template <>
46+
struct formatter<regex_dfa::Alphabet>
47+
{
48+
template <typename ParseContext>
49+
constexpr auto parse(ParseContext& ctx)
50+
{
51+
return ctx.begin();
52+
}
53+
54+
template <typename FormatContext>
55+
constexpr auto format(const regex_dfa::Alphabet& v, FormatContext& ctx)
56+
{
57+
return format_to(ctx.out(), "{}", v.to_string());
58+
}
59+
};
60+
} // namespace fmt

Diff for: src/regex_dfa/CMakeLists.txt

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
add_library(regex_dfa STATIC
2+
Alphabet.cpp
3+
Compiler.cpp
4+
DFA.cpp
5+
DFABuilder.cpp
6+
DFAMinimizer.cpp
7+
DotWriter.cpp
8+
MultiDFA.cpp
9+
NFA.cpp
10+
NFABuilder.cpp
11+
RegExpr.cpp
12+
RegExprParser.cpp
13+
RuleParser.cpp
14+
State.cpp
15+
Symbols.cpp
16+
Report.cpp
17+
SourceLocation.cpp
18+
)
19+
20+
target_include_directories(regex_dfa PUBLIC ${PROJECT_SOURCE_DIR}/src ${CMAKE_SOURCE_DIR}/src)
21+
target_link_libraries(regex_dfa PUBLIC fmt::fmt-header-only)
22+
23+
# ----------------------------------------------------------------------------
24+
if(TESTS)
25+
add_executable(regex_dfa_test
26+
regex_dfa_test.cpp
27+
DFABuilder_test.cpp
28+
DotWriter_test.cpp
29+
Lexer_test.cpp
30+
NFA_test.cpp
31+
RegExprParser_test.cpp
32+
RuleParser_test.cpp
33+
State_test.cpp
34+
Symbols_test.cpp
35+
util/iterator_test.cpp
36+
util/testing.cpp
37+
)
38+
39+
target_link_libraries(regex_dfa_test PUBLIC regex_dfa)
40+
target_link_libraries(regex_dfa_test PUBLIC fmt::fmt-header-only)
41+
endif(TESTS)

Diff for: src/regex_dfa/CharStream.h

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// This file is part of the "klex" project, http://github.com/christianparpart/klex>
2+
// (c) 2018 Christian Parpart <[email protected]>
3+
//
4+
// Licensed under the MIT License (the "License"); you may not use this
5+
// file except in compliance with the License. You may obtain a copy of
6+
// the License at: http://opensource.org/licenses/MIT
7+
#pragma once
8+
9+
#include <iosfwd>
10+
#include <istream>
11+
#include <string>
12+
13+
namespace regex_dfa
14+
{
15+
16+
class CharStream
17+
{
18+
public:
19+
virtual ~CharStream() = default;
20+
21+
[[nodiscard]] virtual bool isEof() const noexcept = 0;
22+
virtual char get() = 0;
23+
virtual void rollback(int count) = 0;
24+
virtual void rewind() = 0;
25+
};
26+
27+
class StringStream: public CharStream
28+
{
29+
public:
30+
explicit StringStream(std::string&& s): source_ { std::move(s) } {}
31+
32+
[[nodiscard]] bool isEof() const noexcept override { return pos_ >= source_.size(); }
33+
char get() override { return source_[pos_++]; }
34+
void rollback(int count) override { pos_ -= count; }
35+
void rewind() override { pos_ = 0; }
36+
37+
private:
38+
std::string source_;
39+
size_t pos_ = 0;
40+
};
41+
42+
class StandardStream: public CharStream
43+
{
44+
public:
45+
explicit StandardStream(std::istream* source);
46+
47+
[[nodiscard]] bool isEof() const noexcept override { return !source_->good(); }
48+
char get() override { return static_cast<char>(source_->get()); }
49+
50+
void rollback(int count) override
51+
{
52+
source_->clear();
53+
source_->seekg(-count, std::ios::cur);
54+
}
55+
56+
void rewind() override
57+
{
58+
source_->clear();
59+
source_->seekg(initialOffset_, std::ios::beg);
60+
}
61+
62+
private:
63+
std::istream* source_;
64+
std::streamoff initialOffset_;
65+
};
66+
67+
} // namespace regex_dfa

0 commit comments

Comments
 (0)