Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,18 +1,29 @@
package com.williamfiset.algorithms.datastructures.suffixarray;

import java.util.Arrays;

/**
* Medium speed suffix array implementation. Time Complexity: O(nlog^2(n))
* Medium-speed Suffix Array Construction (Prefix Doubling)
*
* Builds a suffix array by repeatedly doubling the prefix length used for
* ranking. In each round, suffixes are sorted by their first 2^k characters
* using the ranks from the previous round as a two-key comparison.
*
* Compare with SuffixArraySlow (O(n^2 log n)) for a simpler but slower approach,
* and SuffixArrayFast (O(n*log(n))) for an optimized version using radix sort.
*
* Time: O(n*log^2(n)) — O(log(n)) doubling rounds, each with O(n*log(n)) sort
* Space: O(n)
*
* @author William Fiset, william.alexandre.fiset@gmail.com
*/
package com.williamfiset.algorithms.datastructures.suffixarray;

public class SuffixArrayMed extends SuffixArray {

// Wrapper class to help sort suffix ranks
static class SuffixRankTuple implements Comparable<SuffixRankTuple> {

// Holds the two-key rank (first half, second half) and original index
// for sorting suffixes by their first 2^k characters.
private static class SuffixRankTuple implements Comparable<SuffixRankTuple> {
int firstHalf, secondHalf, originalIndex;

// Sort Suffix ranks first on the first half then the second half
@Override
public int compareTo(SuffixRankTuple other) {
int cmp = Integer.compare(firstHalf, other.firstHalf);
Expand All @@ -34,87 +45,65 @@ public SuffixArrayMed(int[] text) {
super(text);
}

// Construct a suffix array in O(nlog^2(n))
/**
* Constructs the suffix array using prefix doubling. Each iteration doubles
* the window size and re-ranks suffixes until all ranks are unique.
*/
@Override
protected void construct() {
sa = new int[N];

// Maintain suffix ranks in both a matrix with two rows containing the
// current and last rank information as well as some sortable rank objects
// Two-row matrix: row 0 = current ranks, row 1 = new ranks
int[][] suffixRanks = new int[2][N];
SuffixRankTuple[] ranks = new SuffixRankTuple[N];

// Assign a numerical value to each character in the text
// Initial ranks are the character values themselves
for (int i = 0; i < N; i++) {
suffixRanks[0][i] = T[i];
ranks[i] = new SuffixRankTuple();
}

// O(log(n))
// Double the prefix length each round: 1, 2, 4, 8, ... → O(log(n)) rounds
for (int pos = 1; pos < N; pos *= 2) {

// Build two-key tuples: (rank of first half, rank of second half)
for (int i = 0; i < N; i++) {
SuffixRankTuple suffixRank = ranks[i];
suffixRank.firstHalf = suffixRanks[0][i];
suffixRank.secondHalf = i + pos < N ? suffixRanks[0][i + pos] : -1;
suffixRank.originalIndex = i;
}

// O(nlog(n))
java.util.Arrays.sort(ranks);
Arrays.sort(ranks);

// Assign new ranks based on sorted order
int newRank = 0;
suffixRanks[1][ranks[0].originalIndex] = 0;

for (int i = 1; i < N; i++) {
SuffixRankTuple prev = ranks[i - 1];
SuffixRankTuple cur = ranks[i];

SuffixRankTuple lastSuffixRank = ranks[i - 1];
SuffixRankTuple currSuffixRank = ranks[i];
// Increment rank only when the tuple differs from the previous
if (cur.firstHalf != prev.firstHalf || cur.secondHalf != prev.secondHalf)
newRank++;

// If the first half differs from the second half
if (currSuffixRank.firstHalf != lastSuffixRank.firstHalf
|| currSuffixRank.secondHalf != lastSuffixRank.secondHalf) newRank++;

suffixRanks[1][currSuffixRank.originalIndex] = newRank;
suffixRanks[1][cur.originalIndex] = newRank;
}

// Place top row (current row) to be the last row
suffixRanks[0] = suffixRanks[1];

// Optimization to stop early
// All ranks unique means sorting is complete
if (newRank == N - 1) break;
}

// Fill suffix array
for (int i = 0; i < N; i++) {
sa[i] = ranks[i].originalIndex;
ranks[i] = null;
}

// Cleanup
suffixRanks[0] = suffixRanks[1] = null;
suffixRanks = null;
ranks = null;
}

public static void main(String[] args) {

// String[] strs = { "AAGAAGC", "AGAAGT", "CGAAGC" };
// String[] strs = { "abca", "bcad", "daca" };
// String[] strs = { "abca", "bcad", "daca" };
// String[] strs = { "AABC", "BCDC", "BCDE", "CDED" };
// String[] strs = { "abcdefg", "bcdefgh", "cdefghi" };
// String[] strs = { "xxx", "yyy", "zzz" };
// TreeSet <String> lcss = SuffixArrayMed.lcs(strs, 2);
// System.out.println(lcss);

// SuffixArrayMed sa = new SuffixArrayMed("abracadabra");
// System.out.println(sa);
// System.out.println(java.util.Arrays.toString(sa.sa));
// System.out.println(java.util.Arrays.toString(sa.lcp));

SuffixArrayMed sa = new SuffixArrayMed("ABBABAABAA");
// SuffixArrayMed sa = new SuffixArrayMed("GAGAGAGAGAGAG");
System.out.println(sa);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
* a standard comparison sort, and extracting the sorted indices.
* Simple to understand but slow for large inputs.
*
* Compare with SuffixArrayMed (O(n log^2 n)) and SuffixArrayFast (O(n log n))
* Compare with SuffixArrayMed (O(n*log^2(n))) and SuffixArrayFast (O(n*log(n)))
* to see progressively more efficient construction algorithms.
*
* Time: O(n^2 log n) — sorting is O(n log n) comparisons, each O(n)
* Time: O(n^2*log(n)) — sorting is O(n*log(n)) comparisons, each O(n)
* Space: O(n)
*
* @author William Fiset, william.alexandre.fiset@gmail.com
Expand Down
Loading