|
| 1 | +#ifndef CH5_INPLACEMSD_H |
| 2 | +#define CH5_INPLACEMSD_H |
| 3 | + |
| 4 | +#include <vector> |
| 5 | +#include <string> |
| 6 | + |
| 7 | +using std::vector; |
| 8 | +using std::string; |
| 9 | + |
| 10 | +/** |
| 11 | + * The {@code InplaceMSD} class provides static methods for sorting an |
| 12 | + * array of extended ASCII strings using in-place MSD radix sort. |
| 13 | + * <p> |
| 14 | + * For additional documentation, |
| 15 | + * see <a href="https://algs4.cs.princeton.edu/51radix">Section 5.1</a> of |
| 16 | + * <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne. |
| 17 | + * |
| 18 | + * @author Ivan Pesin |
| 19 | + */ |
| 20 | +class InplaceMSD { |
| 21 | +public: |
| 22 | + InplaceMSD() = delete; |
| 23 | + |
| 24 | + /** |
| 25 | + * Rearranges the array of extended ASCII strings in ascending order. |
| 26 | + * This is an unstable sorting algorithm. |
| 27 | + * |
| 28 | + * @param a the array to be sorted |
| 29 | + */ |
| 30 | + static void sort(vector<string> &a) { |
| 31 | + int n = a.size(); |
| 32 | + sort(a, 0, n - 1, 0); |
| 33 | + } |
| 34 | + |
| 35 | +private: |
| 36 | + // sort from a[lo] to a[hi], starting at the dth character |
| 37 | + static void sort(vector<string> &a, int lo, int hi, int d) { |
| 38 | + |
| 39 | + // cutoff to insertion sort for small subarrays |
| 40 | + if (hi <= lo + CUTOFF) { |
| 41 | + insertion(a, lo, hi, d); |
| 42 | + return; |
| 43 | + } |
| 44 | + |
| 45 | + // compute frequency counts |
| 46 | + vector<int> heads(R + 2); |
| 47 | + vector<int> tails(R + 1); |
| 48 | + for (int i = lo; i <= hi; i++) { |
| 49 | + int c = charAt(a[i], d); |
| 50 | + heads[c + 2]++; |
| 51 | + } |
| 52 | + |
| 53 | + // transform counts to indices |
| 54 | + heads[0] = lo; |
| 55 | + for (int r = 0; r < R + 1; r++) { |
| 56 | + heads[r + 1] += heads[r]; |
| 57 | + tails[r] = heads[r + 1]; |
| 58 | + } |
| 59 | + |
| 60 | + // sort by d-th character in-place |
| 61 | + for (int r = 0; r < R + 1; r++) { |
| 62 | + while (heads[r] < tails[r]) { |
| 63 | + int c = charAt(a[heads[r]], d); |
| 64 | + while (c + 1 != r) { |
| 65 | + swap(a[heads[r]], a[heads[c + 1]++]); |
| 66 | + c = charAt(a[heads[r]], d); |
| 67 | + } |
| 68 | + heads[r]++; |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + // recursively sort for each character (excludes sentinel -1) |
| 73 | + for (int r = 0; r < R; r++) |
| 74 | + sort(a, tails[r], tails[r + 1] - 1, d + 1); |
| 75 | + } |
| 76 | + |
| 77 | + // insertion sort a[lo..hi], starting at dth character |
| 78 | + static void insertion(vector<string> &a, int lo, int hi, int d) { |
| 79 | + for (int i = lo; i <= hi; i++) |
| 80 | + for (int j = i; j > lo && less(a[j], a[j - 1], d); j--) |
| 81 | + swap(a[j], a[j - 1]); |
| 82 | + } |
| 83 | + |
| 84 | + // is v less than w, starting at character d |
| 85 | + static bool less(string v, string w, int d) { |
| 86 | + // assert v.substring(0, d).equals(w.substring(0, d)); |
| 87 | + for (int i = d; i < std::min(v.length(), w.length()); i++) { |
| 88 | + if (v[i] < w[i]) return true; |
| 89 | + if (v[i] > w[i]) return false; |
| 90 | + } |
| 91 | + return v.length() < w.length(); |
| 92 | + } |
| 93 | + |
| 94 | + // return dth character of s, -1 if d = length of string |
| 95 | + static int charAt(string s, int d) { |
| 96 | + if (d == s.length()) return -1; |
| 97 | + return s[d]; |
| 98 | + } |
| 99 | + |
| 100 | +private: |
| 101 | + static int R; // extended ASCII alphabet size |
| 102 | + static int CUTOFF; // cutoff to insertion sort |
| 103 | +}; |
| 104 | + |
| 105 | +int InplaceMSD::R = 256; |
| 106 | +int InplaceMSD::CUTOFF = 15; |
| 107 | + |
| 108 | +#endif //CH5_INPLACEMSD_H |
0 commit comments