Skip to content

Commit 1efdd89

Browse files
committed
update ch5
1 parent e14dc17 commit 1efdd89

File tree

12 files changed

+454
-2
lines changed

12 files changed

+454
-2
lines changed

ch5/10_TrieSET/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ project(10_TrieSET)
33

44
set(CMAKE_CXX_STANDARD 14)
55

6-
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/TST.h ../head/AmericanFlag.h ../head/AmericanFlagX.h)
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/TST.h ../head/AmericanFlag.h ../head/AmericanFlagX.h ../head/KMP.h ../head/BoyerMoore.h ../head/RabinKarp.h)
77
add_executable(10_TrieSET ${SOURCE_FILES})

ch5/12_KMP/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(12_KMP)
3+
4+
set(CMAKE_CXX_STANDARD 14)
5+
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/TST.h)
7+
add_executable(12_KMP ${SOURCE_FILES})

ch5/12_KMP/main.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#include "../head/KMP.h"
2+
#include <iostream>
3+
#include <fstream>
4+
5+
using namespace std;
6+
7+
/**
8+
* Takes a pattern string and an input string as command-line arguments;
9+
* searches for the pattern string in the text string; and prints
10+
* the first occurrence of the pattern string in the text string.
11+
*
12+
* @param args the command-line arguments
13+
*/
14+
int main() {
15+
string pat = "abracadabra";
16+
string txt = "abacadabrabracabracadabrabrabracad";
17+
vector<char> pattern(pat.c_str(), pat.c_str() + pat.size());
18+
vector<char> text(txt.c_str(), txt.c_str() + txt.size());
19+
20+
KMP kmp1(pat);
21+
int offset1 = kmp1.search(txt);
22+
23+
KMP kmp2(pattern, 256);
24+
int offset2 = kmp2.search(text);
25+
26+
// print results
27+
cout << "text: " << txt << endl;
28+
29+
cout << "pattern: ";
30+
for (int i = 0; i < offset1; i++)
31+
cout << " ";
32+
cout << pat << endl;
33+
34+
cout << "pattern: ";
35+
for (int i = 0; i < offset2; i++)
36+
cout << " ";
37+
cout << pat << endl;
38+
}

ch5/13_BoyerMoore/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(13_BoyerMoore)
3+
4+
set(CMAKE_CXX_STANDARD 14)
5+
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/TST.h)
7+
add_executable(13_BoyerMoore ${SOURCE_FILES})

ch5/13_BoyerMoore/main.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#include "../head/BoyerMoore.h"
2+
#include <iostream>
3+
#include <fstream>
4+
5+
using namespace std;
6+
7+
/**
8+
* Takes a pattern string and an input string as command-line arguments;
9+
* searches for the pattern string in the text string; and prints
10+
* the first occurrence of the pattern string in the text string.
11+
*
12+
* @param args the command-line arguments
13+
*/
14+
int main() {
15+
string pat = "abracadabra";
16+
string txt = "abacadabrabracabracadabrabrabracad";
17+
vector<char> pattern(pat.c_str(), pat.c_str() + pat.size());
18+
vector<char> text(txt.c_str(), txt.c_str() + txt.size());
19+
20+
BoyerMoore boyermoore1(pat);
21+
BoyerMoore boyermoore2(pattern, 256);
22+
int offset1 = boyermoore1.search(txt);
23+
int offset2 = boyermoore2.search(text);
24+
25+
// print results
26+
cout << "text: " + txt << endl;
27+
28+
cout << "pattern: ";
29+
for (int i = 0; i < offset1; i++)
30+
cout << " ";
31+
cout << pat << endl;
32+
33+
cout << "pattern: ";
34+
for (int i = 0; i < offset2; i++)
35+
cout << " ";
36+
cout << pat << endl;
37+
38+
}

ch5/14_RabinKarp/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(14_RabinKarp)
3+
4+
set(CMAKE_CXX_STANDARD 14)
5+
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/TST.h)
7+
add_executable(14_RabinKarp ${SOURCE_FILES})

ch5/14_RabinKarp/main.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#include "../head/RabinKarp.h"
2+
#include <iostream>
3+
#include <fstream>
4+
5+
using namespace std;
6+
7+
/**
8+
* Takes a pattern string and an input string as command-line arguments;
9+
* searches for the pattern string in the text string; and prints
10+
* the first occurrence of the pattern string in the text string.
11+
*
12+
* @param args the command-line arguments
13+
*/
14+
int main() {
15+
string pat = "abracadabra";
16+
string txt = "abacadabrabracabracadabrabrabracad";
17+
18+
RabinKarp searcher(pat);
19+
int offset = searcher.search(txt);
20+
21+
// print results
22+
cout << "text: " + txt << endl;
23+
24+
// from brute force search method 1
25+
cout << "pattern: ";
26+
for (int i = 0; i < offset; i++)
27+
cout << " ";
28+
cout << pat << endl;
29+
}

ch5/3_LSD/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ project(3_LSD)
33

44
set(CMAKE_CXX_STANDARD 14)
55

6-
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/InplaceMSD.h)
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/InplaceMSD.h ../head/Quick3string.h ../head/TrieST.h ../head/temp.h ../head/TrieSET.h)
77
add_executable(3_LSD ${SOURCE_FILES})

ch5/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ add_subdirectory(8_AmericanFlagX)
1515
add_subdirectory(9_TrieST)
1616
add_subdirectory(10_TrieSET)
1717
add_subdirectory(11_TST)
18+
add_subdirectory(12_KMP)
19+
add_subdirectory(13_BoyerMoore)
20+
add_subdirectory(14_RabinKarp)
1821

1922

2023
add_subdirectory(temp)

ch5/head/BoyerMoore.h

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#ifndef CH5_BOYERMOORE_H
2+
#define CH5_BOYERMOORE_H
3+
4+
#include <vector>
5+
#include <string>
6+
7+
using std::vector;
8+
using std::string;
9+
10+
/**
11+
* The {@code BoyerMoore} class finds the first occurrence of a pattern string
12+
* in a text string.
13+
* <p>
14+
* This implementation uses the Boyer-Moore algorithm (with the bad-character
15+
* rule, but not the strong good suffix rule).
16+
* <p>
17+
* For additional documentation,
18+
* see <a href="https://algs4.cs.princeton.edu/53substring">Section 5.3</a> of
19+
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
20+
*/
21+
class BoyerMoore {
22+
public:
23+
/**
24+
* Preprocesses the pattern string.
25+
*
26+
* @param pat the pattern string
27+
*/
28+
BoyerMoore(string pat) : R(256), pat(pat), right(R, -1) {
29+
// position of rightmost occurrence of c in the pattern
30+
for (int j = 0; j < pat.length(); j++)
31+
right[pat[j]] = j;
32+
}
33+
34+
/**
35+
* Preprocesses the pattern string.
36+
*
37+
* @param pattern the pattern string
38+
* @param R the alphabet size
39+
*/
40+
BoyerMoore(vector<char> pattern, int R) : R(R), pattern(pattern), right(R, -1) {
41+
for (int j = 0; j < pattern.size(); j++)
42+
right[pattern[j]] = j;
43+
}
44+
45+
/**
46+
* Returns the index of the first occurrrence of the pattern string
47+
* in the text string.
48+
*
49+
* @param txt the text string
50+
* @return the index of the first occurrence of the pattern string
51+
* in the text string; n if no such match
52+
*/
53+
int search(string txt) {
54+
int m = pat.length();
55+
int n = txt.length();
56+
int skip;
57+
for (int i = 0; i <= n - m; i += skip) {
58+
skip = 0;
59+
for (int j = m - 1; j >= 0; j--) {
60+
if (pat[j] != txt[i + j]) {
61+
skip = std::max(1, j - right[txt[i + j]]);
62+
break;
63+
}
64+
}
65+
if (skip == 0) return i; // found
66+
}
67+
return n; // not found
68+
}
69+
70+
/**
71+
* Returns the index of the first occurrrence of the pattern string
72+
* in the text string.
73+
*
74+
* @param text the text string
75+
* @return the index of the first occurrence of the pattern string
76+
* in the text string; n if no such match
77+
*/
78+
int search(vector<char> &text) {
79+
int m = pattern.size();
80+
int n = text.size();
81+
int skip;
82+
for (int i = 0; i <= n - m; i += skip) {
83+
skip = 0;
84+
for (int j = m - 1; j >= 0; j--) {
85+
if (pattern[j] != text[i + j]) {
86+
skip = std::max(1, j - right[text[i + j]]);
87+
break;
88+
}
89+
}
90+
if (skip == 0) return i; // found
91+
}
92+
return n; // not found
93+
}
94+
95+
private:
96+
int R; // the radix
97+
vector<int> right; // the bad-character skip array
98+
vector<char> pattern; // store the pattern as a character array
99+
string pat; // or as a string
100+
};
101+
102+
#endif //CH5_BOYERMOORE_H

ch5/head/KMP.h

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#ifndef CH5_KMP_H
2+
#define CH5_KMP_H
3+
4+
#include <vector>
5+
#include <string>
6+
7+
using std::string;
8+
using std::vector;
9+
10+
/**
11+
* The {@code KMP} class finds the first occurrence of a pattern string
12+
* in a text string.
13+
* <p>
14+
* This implementation uses a version of the Knuth-Morris-Pratt substring search
15+
* algorithm. The version takes time proportional to <em>n</em> + <em>m R</em>
16+
* in the worst case, where <em>n</em> is the length of the text string,
17+
* <em>m</em> is the length of the pattern, and <em>R</em> is the alphabet size.
18+
* It uses extra space proportional to <em>m R</em>.
19+
* <p>
20+
* For additional documentation,
21+
* see <a href="https://algs4.cs.princeton.edu/53substring">Section 5.3</a> of
22+
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
23+
*/
24+
class KMP {
25+
public:
26+
/**
27+
* Preprocesses the pattern string.
28+
*
29+
* @param pat the pattern string
30+
*/
31+
KMP(string pat) : R(256), pat(pat), dfa(R, vector<int>(pat.length(), 0)) {
32+
// build DFA from pattern
33+
int m = pat.length();
34+
dfa[pat[0]][0] = 1;
35+
for (int x = 0, j = 1; j < m; j++) {
36+
for (int c = 0; c < R; c++)
37+
dfa[c][j] = dfa[c][x]; // Copy mismatch cases.
38+
dfa[pat[j]][j] = j + 1; // Set match case.
39+
x = dfa[pat[j]][x]; // Update restart state.
40+
}
41+
}
42+
43+
/**
44+
* Preprocesses the pattern string.
45+
*
46+
* @param pattern the pattern string
47+
* @param R the alphabet size
48+
*/
49+
KMP(vector<char> &pattern, int R) : R(R), pattern(pattern), dfa(R, vector<int>(pattern.size(), 0)) {
50+
int m = pattern.size();
51+
dfa[pattern[0]][0] = 1;
52+
for (int x = 0, j = 1; j < m; j++) {
53+
for (int c = 0; c < R; c++)
54+
dfa[c][j] = dfa[c][x]; // Copy mismatch cases.
55+
dfa[pattern[j]][j] = j + 1; // Set match case.
56+
x = dfa[pattern[j]][x]; // Update restart state.
57+
}
58+
}
59+
60+
/**
61+
* Returns the index of the first occurrrence of the pattern string
62+
* in the text string.
63+
*
64+
* @param txt the text string
65+
* @return the index of the first occurrence of the pattern string
66+
* in the text string; N if no such match
67+
*/
68+
int search(string &txt) {
69+
// simulate operation of DFA on text
70+
int m = pat.length();
71+
int n = txt.length();
72+
int i, j;
73+
for (i = 0, j = 0; i < n && j < m; i++) {
74+
j = dfa[txt[i]][j];
75+
}
76+
if (j == m) return i - m; // found
77+
return n; // not found
78+
}
79+
80+
/**
81+
* Returns the index of the first occurrrence of the pattern string
82+
* in the text string.
83+
*
84+
* @param text the text string
85+
* @return the index of the first occurrence of the pattern string
86+
* in the text string; N if no such match
87+
*/
88+
int search(vector<char> &text) {
89+
90+
// simulate operation of DFA on text
91+
int m = pattern.size();
92+
int n = text.size();
93+
int i, j;
94+
for (i = 0, j = 0; i < n && j < m; i++) {
95+
j = dfa[text[i]][j];
96+
}
97+
if (j == m) return i - m; // found
98+
return n; // not found
99+
}
100+
101+
private:
102+
const int R; // the radix
103+
vector<vector<int>> dfa; // the KMP automoton
104+
vector<char> pattern; // either the character array for the pattern
105+
string pat; // or the pattern string
106+
};
107+
108+
109+
#endif //CH5_KMP_H

0 commit comments

Comments
 (0)