Rabin-Karp implementation runs in O(n log n) due to using extended euclidean algorithm for modInverse #95

BrandonTang89 · 2021-12-01T08:40:13Z

Considering the following section in /ch6/string_matching.cpp, we call modInverse for every check (we check O(n) possible sub-strings of length m). The comment for hash_fast states it is O(1) but since the extended euclidean algorithm is O(log m), then our total time for string matching is O(n log m).

cpbook-code/ch6/string_matching.cpp

Lines 71 to 97 in c3fb85a

    
           int extEuclid(int a, int b, int &x, int &y) {    // pass x and y by ref 
        
             int xx = y = 0; 
        
             int yy = x = 1; 
        
             while (b) {                                    // repeats until b == 0 
        
               int q = a/b; 
        
               tie(a, b) = tuple(b, a%b); 
        
               tie(x, xx) = tuple(xx, x-q*xx); 
        
               tie(y, yy) = tuple(yy, y-q*yy); 
        
             } 
        
             return a;                                      // returns gcd(a, b) 
        
           } 
        
           int modInverse(int b, int m) {                   // returns b^(-1) (mod m) 
        
             int x, y; 
        
             int d = extEuclid(b, m, x, y);                 // to get b*x + m*y == d 
        
             if (d != 1) return -1;                         // to indicate failure 
        
             // b*x + m*y == 1, now apply (mod m) to get b*x == 1 (mod m) 
        
             return (x+m)%m;                                // this is the answer 
        
           } 
        
           int hash_fast(int L, int R) {                    // O(1) hash of any substr 
        
             if (L == 0) return h[R];                       // h is the prefix hashes 
        
             int ans = 0; 
        
             ans = ((h[R] - h[L-1]) % M + M) % M;           // compute differences 
        
             ans = ((ll)ans * modInverse(Pow[L], M)) % M;   // remove P[L]^-1 (mod M) 
        
             return ans; 
        
           }

We can fix this by pre-computing the inverse of each P[i] using the following implementation in O(log m + n) since we only use modInverse once.

class RollingHash {
   public:
    vi P, H;   // P[i] = p^i mod m, H[i] is the hash of prefix length i
    vi P_inv;  // P_inv[i] = p^(-i) mod m
    const int n;
    string T;
    const ll p, M;

    RollingHash(string _s, int _p = 131, int _M = (int)1e9 + 7)
        : n(_s.size()), T(_s), p(_p), M(_M) {
        PrepareP();
        computeRollingHash();
    }
    void PrepareP() {  // precompute P and P_inv
        P.assign(n, 0);
        P[0] = 1;
        for (int i = 1; i < n; i++) P[i] = (P[i - 1] * p) % M;

        P_inv.assign(n, 0);
        P_inv[n - 1] = modInverse(P[n - 1], M);
        for (int i = n - 2; i >= 0; i--) P_inv[i] = (P_inv[i + 1] * p) % M;
    }

    void computeRollingHash() {  // precompute H
        H.assign(n, 0);
        for (int i = 0; i < n; i++) {
            if (i != 0) H[i] = H[i - 1];
            H[i] = (H[i] + ((ll)T[i] * P[i]) % M) % M;
        }
    }

    int getHash(int l, int r) {  // get hash of substring [l, r]
        if (l == 0) return H[r];
        int ans = ((H[r] - H[l - 1]) % M + M) % M;
        ans = ((ll)ans * P_inv[l]) % M;
        return ans;
    }
};

// Returns a vector of indices of all occurrences of pattern in text
vi rabinKarp(string P, string T) {
    RollingHash P_rolling(P);
    RollingHash T_rolling(T);
    vi matches;

    int n = T.size(), m = P.size();
    int p_hash = P_rolling.getHash(0, m - 1);
    for (int i = 0; i <= n - m; i++) {
        if (p_hash == T_rolling.getHash(i, i + m - 1)) {  // match
            matches.push_back(i);
        }
    }
    return matches;
};

Note that using the original code gets TLE for kattis stringmatching while the modified can AC.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Rabin-Karp implementation runs in O(n log n) due to using extended euclidean algorithm for modInverse #95

Rabin-Karp implementation runs in O(n log n) due to using extended euclidean algorithm for modInverse #95

BrandonTang89 commented Dec 1, 2021 •

edited

Loading

Rabin-Karp implementation runs in O(n log n) due to using extended euclidean algorithm for modInverse #95

Rabin-Karp implementation runs in O(n log n) due to using extended euclidean algorithm for modInverse #95

Comments

BrandonTang89 commented Dec 1, 2021 • edited Loading

BrandonTang89 commented Dec 1, 2021 •

edited

Loading