Skip to content

Commit ed9f8e4

Browse files
committed
add ch5
1 parent 1c27e80 commit ed9f8e4

File tree

13 files changed

+540
-0
lines changed

13 files changed

+540
-0
lines changed

README.md

+8
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,11 @@ Algorithms, 4th edition textbook code (using c++)
9595
| [-](https://algs4.cs.princeton.edu/44sp/index.php#-) | [Arbitrage.cpp](ch4/49_Arbitrage/main.cpp) | arbitrage detection | [-](https://algs4.cs.princeton.edu/44sp/index.php#-) | [FloydWarshall.h](ch4/head/FloydWarshall.h) | all-pairs shortest paths (dense) |
9696
| [-](https://algs4.cs.princeton.edu/44sp/index.php#-) | [AdjMatrixEdgeWeightedDigraph.h](ch4/head/AdjMatrixEdgeWeightedDigraph.h) | edge-weighted graph (dense) | | | |
9797

98+
## ch5. Strings
99+
100+
| REF | PROGRAM | DESCRIPTION / C++DOC | REF | PROGRAM | DESCRIPTION / C++DOC |
101+
| :---------------------------------------------------------: | :----------------------------------------------------------: | :----------------------: | :---------------------------------------------------------: | :----------------------------------------------------------: | :------------------: |
102+
| [-](https://algs4.cs.princeton.edu/50strings/index.php#-) | [Alphabet.java](https://algs4.cs.princeton.edu/50strings/Alphabet.java.html) | alphabet | [-](https://algs4.cs.princeton.edu/50strings/index.php#-) | [Count.java](https://algs4.cs.princeton.edu/50strings/Count.java.html) | alphabet client |
103+
| [5.1](https://algs4.cs.princeton.edu/51radix/index.php#5.1) | [LSD.h](ch5/head/LSD.h) | LSD radix sort | [5.2](https://algs4.cs.princeton.edu/51radix/index.php#5.2) | [MSD.h](ch5/head/MSD.h) | MSD radix sort |
104+
| [-](https://algs4.cs.princeton.edu/51radix/index.php#-) | [InplaceMSD.h](ch5/head/InplaceMSD.h) | In-place MSD radix sort1 | | | |
105+

ch5/3_LSD/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(3_LSD)
3+
4+
set(CMAKE_CXX_STANDARD 14)
5+
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h ../head/InplaceMSD.h)
7+
add_executable(3_LSD ${SOURCE_FILES})

ch5/3_LSD/main.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "../head/LSD.h"
2+
#include <iostream>
3+
#include <fstream>
4+
5+
using namespace std;
6+
7+
/**
8+
* Reads in a sequence of fixed-length strings from standard input;
9+
* LSD radix sorts them;
10+
* and prints them to standard output in ascending order.
11+
*
12+
* @param args the command-line arguments
13+
*/
14+
int main() {
15+
string file = "/home/ace/AceDev/C++/algorithm/ch5/data/words3.txt";
16+
fstream in(file);
17+
string tmp;
18+
vector<string> a;
19+
while (in >> tmp)
20+
a.push_back(tmp);
21+
22+
int n = a.size();
23+
int w = a[0].length();
24+
25+
// sort the strings
26+
LSD::sort(a, w);
27+
28+
// print results
29+
for (int i = 0; i < n; ++i)
30+
cout << a[i] << endl;
31+
}

ch5/4_MSD/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(4_MSD)
3+
4+
set(CMAKE_CXX_STANDARD 14)
5+
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h)
7+
add_executable(4_MSD ${SOURCE_FILES})

ch5/4_MSD/main.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "../head/MSD.h"
2+
#include <iostream>
3+
#include <fstream>
4+
5+
using namespace std;
6+
7+
/**
8+
* Reads in a sequence of extended ASCII strings from standard input;
9+
* MSD radix sorts them;
10+
* and prints them to standard output in ascending order.
11+
*
12+
* @param args the command-line arguments
13+
*/
14+
int main() {
15+
string file = "/home/ace/AceDev/C++/algorithm/ch5/data/shells.txt";
16+
fstream in(file);
17+
string tmp;
18+
vector<string> a;
19+
while (in >> tmp)
20+
a.push_back(tmp);
21+
22+
int n = a.size();
23+
int w = a[0].length();
24+
25+
// sort the strings
26+
MSD::sort(a);
27+
28+
// print results
29+
for (int i = 0; i < n; ++i)
30+
cout << a[i] << endl;
31+
}

ch5/5_InplaceMSD/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(5_InplaceMSD)
3+
4+
set(CMAKE_CXX_STANDARD 14)
5+
6+
set(SOURCE_FILES main.cpp ../head/LSD.h ../head/MSD.h)
7+
add_executable(5_InplaceMSD ${SOURCE_FILES})

ch5/5_InplaceMSD/main.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "../head/InplaceMSD.h"
2+
#include <iostream>
3+
#include <fstream>
4+
5+
using namespace std;
6+
7+
/**
8+
* Reads in a sequence of extended ASCII strings from standard input;
9+
* in-place MSD radix sorts them;
10+
* and prints them to standard output in ascending order.
11+
*
12+
* @param args the command-line arguments
13+
*/
14+
int main() {
15+
string file = "/home/ace/AceDev/C++/algorithm/ch5/data/shells.txt";
16+
fstream in(file);
17+
string tmp;
18+
vector<string> a;
19+
while (in >> tmp)
20+
a.push_back(tmp);
21+
22+
int n = a.size();
23+
int w = a[0].length();
24+
25+
// sort the strings
26+
InplaceMSD::sort(a);
27+
28+
// print results
29+
for (int i = 0; i < n; ++i)
30+
cout << a[i] << endl;
31+
}

ch5/CMakeLists.txt

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(ch5)
3+
4+
set(CMAKE_CXX_STANDARD 14)
5+
6+
include_directories(head)
7+
include_directories(data)
8+
9+
add_subdirectory(3_LSD)
10+
add_subdirectory(4_MSD)
11+
add_subdirectory(5_InplaceMSD)
12+

ch5/data/shells.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
she sells seashells by the sea shore
2+
the shells she sells are surely seashells

ch5/data/words3.txt

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
bed bug dad yes zoo
2+
now for tip ilk dim
3+
tag jot sob nob sky
4+
hut men egg few jay
5+
owl joy rap gig wee
6+
was wad fee tap tar
7+
dug jam all bad yet

ch5/head/InplaceMSD.h

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#ifndef CH5_INPLACEMSD_H
2+
#define CH5_INPLACEMSD_H
3+
4+
#include <vector>
5+
#include <string>
6+
7+
using std::vector;
8+
using std::string;
9+
10+
/**
11+
* The {@code InplaceMSD} class provides static methods for sorting an
12+
* array of extended ASCII strings using in-place MSD radix sort.
13+
* <p>
14+
* For additional documentation,
15+
* see <a href="https://algs4.cs.princeton.edu/51radix">Section 5.1</a> of
16+
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
17+
*
18+
* @author Ivan Pesin
19+
*/
20+
class InplaceMSD {
21+
public:
22+
InplaceMSD() = delete;
23+
24+
/**
25+
* Rearranges the array of extended ASCII strings in ascending order.
26+
* This is an unstable sorting algorithm.
27+
*
28+
* @param a the array to be sorted
29+
*/
30+
static void sort(vector<string> &a) {
31+
int n = a.size();
32+
sort(a, 0, n - 1, 0);
33+
}
34+
35+
private:
36+
// sort from a[lo] to a[hi], starting at the dth character
37+
static void sort(vector<string> &a, int lo, int hi, int d) {
38+
39+
// cutoff to insertion sort for small subarrays
40+
if (hi <= lo + CUTOFF) {
41+
insertion(a, lo, hi, d);
42+
return;
43+
}
44+
45+
// compute frequency counts
46+
vector<int> heads(R + 2);
47+
vector<int> tails(R + 1);
48+
for (int i = lo; i <= hi; i++) {
49+
int c = charAt(a[i], d);
50+
heads[c + 2]++;
51+
}
52+
53+
// transform counts to indices
54+
heads[0] = lo;
55+
for (int r = 0; r < R + 1; r++) {
56+
heads[r + 1] += heads[r];
57+
tails[r] = heads[r + 1];
58+
}
59+
60+
// sort by d-th character in-place
61+
for (int r = 0; r < R + 1; r++) {
62+
while (heads[r] < tails[r]) {
63+
int c = charAt(a[heads[r]], d);
64+
while (c + 1 != r) {
65+
swap(a[heads[r]], a[heads[c + 1]++]);
66+
c = charAt(a[heads[r]], d);
67+
}
68+
heads[r]++;
69+
}
70+
}
71+
72+
// recursively sort for each character (excludes sentinel -1)
73+
for (int r = 0; r < R; r++)
74+
sort(a, tails[r], tails[r + 1] - 1, d + 1);
75+
}
76+
77+
// insertion sort a[lo..hi], starting at dth character
78+
static void insertion(vector<string> &a, int lo, int hi, int d) {
79+
for (int i = lo; i <= hi; i++)
80+
for (int j = i; j > lo && less(a[j], a[j - 1], d); j--)
81+
swap(a[j], a[j - 1]);
82+
}
83+
84+
// is v less than w, starting at character d
85+
static bool less(string v, string w, int d) {
86+
// assert v.substring(0, d).equals(w.substring(0, d));
87+
for (int i = d; i < std::min(v.length(), w.length()); i++) {
88+
if (v[i] < w[i]) return true;
89+
if (v[i] > w[i]) return false;
90+
}
91+
return v.length() < w.length();
92+
}
93+
94+
// return dth character of s, -1 if d = length of string
95+
static int charAt(string s, int d) {
96+
if (d == s.length()) return -1;
97+
return s[d];
98+
}
99+
100+
private:
101+
static int R; // extended ASCII alphabet size
102+
static int CUTOFF; // cutoff to insertion sort
103+
};
104+
105+
int InplaceMSD::R = 256;
106+
int InplaceMSD::CUTOFF = 15;
107+
108+
#endif //CH5_INPLACEMSD_H

ch5/head/LSD.h

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#ifndef CH5_LSD_H
2+
#define CH5_LSD_H
3+
4+
#include <string>
5+
#include <vector>
6+
7+
using std::string;
8+
using std::vector;
9+
10+
/**
11+
* The {@code LSD} class provides static methods for sorting an
12+
* array of <em>w</em>-character strings or 32-bit integers using LSD radix sort.
13+
* <p>
14+
* For additional documentation,
15+
* see <a href="https://algs4.cs.princeton.edu/51radix">Section 5.1</a> of
16+
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
17+
*
18+
* @author Robert Sedgewick
19+
* @author Kevin Wayne
20+
*/
21+
class LSD {
22+
public:
23+
// do not instantiate
24+
LSD() = delete;
25+
26+
/**
27+
* Rearranges the array of W-character strings in ascending order.
28+
*
29+
* @param a the array to be sorted
30+
* @param w the number of characters per string
31+
*/
32+
static void sort(vector<string> &a, int w) {
33+
int n = a.size();
34+
int R = 256; // extend ASCII alphabet size
35+
vector<string> aux(n);
36+
37+
for (int d = w - 1; d >= 0; d--) {
38+
// sort by key-indexed counting on dth character
39+
40+
// compute frequency counts
41+
vector<int> count(R + 1);
42+
for (int i = 0; i < n; i++)
43+
count[a[i][d] + 1]++;
44+
45+
// compute cumulates
46+
for (int r = 0; r < R; r++)
47+
count[r + 1] += count[r];
48+
49+
// move data
50+
for (int i = 0; i < n; i++)
51+
aux[count[a[i][d]]++] = a[i];
52+
53+
// copy back
54+
for (int i = 0; i < n; i++)
55+
a[i] = aux[i];
56+
}
57+
}
58+
59+
/**
60+
* Rearranges the array of 32-bit integers in ascending order.
61+
* This is about 2-3x faster than Arrays.sort().
62+
*
63+
* @param a the array to be sorted
64+
*/
65+
static void sort(vector<int> &a) {
66+
int BITS = 32; // each int is 32 bits
67+
int R = 1 << BITS_PER_BYTE; // each bytes is between 0 and 255
68+
int MASK = R - 1; // 0xFF
69+
int w = BITS / BITS_PER_BYTE; // each int is 4 bytes
70+
71+
int n = a.size();
72+
73+
vector<int> aux(n);
74+
75+
for (int d = 0; d < w; d++) {
76+
77+
// compute frequency counts
78+
vector<int> count(R+1);
79+
for (int i = 0; i < n; i++) {
80+
int c = (a[i] >> BITS_PER_BYTE * d) & MASK;
81+
count[c + 1]++;
82+
}
83+
84+
// compute cumulates
85+
for (int r = 0; r < R; r++)
86+
count[r + 1] += count[r];
87+
88+
// for most significant byte, 0x80-0xFF comes before 0x00-0x7F
89+
if (d == w - 1) {
90+
int shift1 = count[R] - count[R / 2];
91+
int shift2 = count[R / 2];
92+
for (int r = 0; r < R / 2; r++)
93+
count[r] += shift1;
94+
for (int r = R / 2; r < R; r++)
95+
count[r] -= shift2;
96+
}
97+
98+
// move data
99+
for (int i = 0; i < n; i++) {
100+
int c = (a[i] >> BITS_PER_BYTE * d) & MASK;
101+
aux[count[c]++] = a[i];
102+
}
103+
104+
// copy back
105+
for (int i = 0; i < n; i++)
106+
a[i] = aux[i];
107+
}
108+
}
109+
110+
private:
111+
static int BITS_PER_BYTE;
112+
};
113+
114+
int LSD::BITS_PER_BYTE = 8;
115+
116+
#endif //CH5_LSD_H

0 commit comments

Comments
 (0)