williamfiset · williamfiset · Mar 10, 2026 · Mar 10, 2026
diff --git a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayMed.java b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArrayMed.java
@@ -1,18 +1,29 @@
+package com.williamfiset.algorithms.datastructures.suffixarray;
+
+import java.util.Arrays;
+
 /**
- * Medium speed suffix array implementation. Time Complexity: O(nlog^2(n))
+ * Medium-speed Suffix Array Construction (Prefix Doubling)
+ *
+ * Builds a suffix array by repeatedly doubling the prefix length used for
+ * ranking. In each round, suffixes are sorted by their first 2^k characters
+ * using the ranks from the previous round as a two-key comparison.
+ *
+ * Compare with SuffixArraySlow (O(n^2 log n)) for a simpler but slower approach,
+ * and SuffixArrayFast (O(n*log(n))) for an optimized version using radix sort.
+ *
+ * Time:  O(n*log^2(n)) — O(log(n)) doubling rounds, each with O(n*log(n)) sort
+ * Space: O(n)
  *
  * @author William Fiset, william.alexandre.fiset@gmail.com
  */
-package com.williamfiset.algorithms.datastructures.suffixarray;
-
 public class SuffixArrayMed extends SuffixArray {
 
-  // Wrapper class to help sort suffix ranks
-  static class SuffixRankTuple implements Comparable<SuffixRankTuple> {
-
+  // Holds the two-key rank (first half, second half) and original index
+  // for sorting suffixes by their first 2^k characters.
+  private static class SuffixRankTuple implements Comparable<SuffixRankTuple> {
     int firstHalf, secondHalf, originalIndex;
 
-    // Sort Suffix ranks first on the first half then the second half
     @Override
     public int compareTo(SuffixRankTuple other) {
       int cmp = Integer.compare(firstHalf, other.firstHalf);
@@ -34,87 +45,65 @@ public SuffixArrayMed(int[] text) {
     super(text);
   }
 
-  // Construct a suffix array in O(nlog^2(n))
+  /**
+   * Constructs the suffix array using prefix doubling. Each iteration doubles
+   * the window size and re-ranks suffixes until all ranks are unique.
+   */
   @Override
   protected void construct() {
     sa = new int[N];
 
-    // Maintain suffix ranks in both a matrix with two rows containing the
-    // current and last rank information as well as some sortable rank objects
+    // Two-row matrix: row 0 = current ranks, row 1 = new ranks
     int[][] suffixRanks = new int[2][N];
     SuffixRankTuple[] ranks = new SuffixRankTuple[N];
 
-    // Assign a numerical value to each character in the text
+    // Initial ranks are the character values themselves
     for (int i = 0; i < N; i++) {
       suffixRanks[0][i] = T[i];
       ranks[i] = new SuffixRankTuple();
     }
 
-    // O(log(n))
+    // Double the prefix length each round: 1, 2, 4, 8, ... → O(log(n)) rounds
     for (int pos = 1; pos < N; pos *= 2) {
 
+      // Build two-key tuples: (rank of first half, rank of second half)
       for (int i = 0; i < N; i++) {
         SuffixRankTuple suffixRank = ranks[i];
         suffixRank.firstHalf = suffixRanks[0][i];
         suffixRank.secondHalf = i + pos < N ? suffixRanks[0][i + pos] : -1;
         suffixRank.originalIndex = i;
       }
 
-      // O(nlog(n))
-      java.util.Arrays.sort(ranks);
+      Arrays.sort(ranks);
 
+      // Assign new ranks based on sorted order
       int newRank = 0;
       suffixRanks[1][ranks[0].originalIndex] = 0;
 
       for (int i = 1; i < N; i++) {
+        SuffixRankTuple prev = ranks[i - 1];
+        SuffixRankTuple cur = ranks[i];
 
-        SuffixRankTuple lastSuffixRank = ranks[i - 1];
-        SuffixRankTuple currSuffixRank = ranks[i];
+        // Increment rank only when the tuple differs from the previous
+        if (cur.firstHalf != prev.firstHalf || cur.secondHalf != prev.secondHalf)
+          newRank++;
 
-        // If the first half differs from the second half
-        if (currSuffixRank.firstHalf != lastSuffixRank.firstHalf
-            || currSuffixRank.secondHalf != lastSuffixRank.secondHalf) newRank++;
-
-        suffixRanks[1][currSuffixRank.originalIndex] = newRank;
+        suffixRanks[1][cur.originalIndex] = newRank;
       }
 
-      // Place top row (current row) to be the last row
       suffixRanks[0] = suffixRanks[1];
 
-      // Optimization to stop early
+      // All ranks unique means sorting is complete
       if (newRank == N - 1) break;
     }
 
-    // Fill suffix array
     for (int i = 0; i < N; i++) {
       sa[i] = ranks[i].originalIndex;
-      ranks[i] = null;
     }
-
-    // Cleanup
-    suffixRanks[0] = suffixRanks[1] = null;
-    suffixRanks = null;
-    ranks = null;
   }
 
   public static void main(String[] args) {
-
-    // String[] strs = { "AAGAAGC", "AGAAGT", "CGAAGC" };
-    // String[] strs = { "abca", "bcad", "daca" };
-    // String[] strs = { "abca", "bcad", "daca" };
-    // String[] strs = { "AABC", "BCDC", "BCDE", "CDED" };
-    // String[] strs = { "abcdefg", "bcdefgh", "cdefghi" };
-    // String[] strs = { "xxx", "yyy", "zzz" };
-    // TreeSet <String> lcss = SuffixArrayMed.lcs(strs, 2);
-    // System.out.println(lcss);
-
-    // SuffixArrayMed sa = new SuffixArrayMed("abracadabra");
-    // System.out.println(sa);
-    // System.out.println(java.util.Arrays.toString(sa.sa));
-    // System.out.println(java.util.Arrays.toString(sa.lcp));
-
     SuffixArrayMed sa = new SuffixArrayMed("ABBABAABAA");
-    // SuffixArrayMed sa = new SuffixArrayMed("GAGAGAGAGAGAG");
     System.out.println(sa);
   }
 }
diff --git a/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java b/src/main/java/com/williamfiset/algorithms/datastructures/suffixarray/SuffixArraySlow.java
@@ -9,10 +9,10 @@
  * a standard comparison sort, and extracting the sorted indices.
  * Simple to understand but slow for large inputs.
  *
- * Compare with SuffixArrayMed (O(n log^2 n)) and SuffixArrayFast (O(n log n))
+ * Compare with SuffixArrayMed (O(n*log^2(n))) and SuffixArrayFast (O(n*log(n)))
  * to see progressively more efficient construction algorithms.
  *
- * Time:  O(n^2 log n) — sorting is O(n log n) comparisons, each O(n)
+ * Time:  O(n^2*log(n)) — sorting is O(n*log(n)) comparisons, each O(n)
  * Space: O(n)
  *
  * @author William Fiset, william.alexandre.fiset@gmail.com