Skip to content

Commit 881ebd1

Browse files
committed
fix rk
1 parent cbef9d6 commit 881ebd1

File tree

1 file changed

+68
-69
lines changed

1 file changed

+68
-69
lines changed

CMP201StringSearch/CMP201StringSearch.cpp

+68-69
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,12 @@ void StopClock() {
117117
void ShowTimeTaken() {
118118
std::cout << "\nTime taken: ";
119119
std::cout << std::chrono::duration_cast<std::chrono::microseconds>(timerEnd - timerStart).count();
120-
std::cout << "ms\n";
120+
std::cout << "us\n";
121121
}
122122

123123
std::string results = "sample size,pattern size, iteration, time taken\n";
124124

125-
void StoreTimeTaken(size_t sampleSize, size_t patternLength, size_t sampleIteration) {
125+
void StoreTimeTaken(long long sampleSize, long long patternLength, long long sampleIteration) {
126126
// sample size out
127127
std::string output = std::to_string(sampleSize);
128128
// pattern length out
@@ -153,32 +153,32 @@ void WriteTimeTaken(std::string algo) {
153153
f.close();
154154
}
155155

156-
std::vector<size_t> Search_BoyerMoore(const std::string& text, const std::string& pattern) {
156+
std::vector<long long> Search_BoyerMoore(const std::string& text, const std::string& pattern) {
157157
StartClock();
158-
size_t textLength = text.length();
159-
size_t patternLength = pattern.length();
158+
long long textLength = text.length();
159+
long long patternLength = pattern.length();
160160

161-
std::vector<size_t> matchingIndexes;
161+
std::vector<long long> matchingIndexes;
162162

163163
// lookup table to store how many places the given position should skip
164164
// ascii extended codes
165-
size_t skip[256] = {};
165+
long long skip[256] = {};
166166

167167
// set all points to be max skip value
168-
for (size_t i = 0; i < 256; ++i)
168+
for (long long i = 0; i < 256; ++i)
169169
// Not in the pattern.
170170
skip[i] = patternLength;
171171

172172
// for each of the characters in the pattern
173-
for (size_t i = 0; i < patternLength; ++i)
173+
for (long long i = 0; i < patternLength; ++i)
174174
// set that character to its length from the end of the pattern
175175
skip[pattern[i]] = (patternLength - 1) - i;
176176

177177
// iterate through all the text, stopping patternLength positions from the end of the text
178-
for (size_t i = 0; i < textLength - patternLength; ++i) {
178+
for (long long i = 0; i < textLength - patternLength; ++i) {
179179
// check if the last character in the pattern is a match
180-
size_t pos = i + patternLength - 1;
181-
size_t distance = skip[text[pos]];
180+
long long pos = i + patternLength - 1;
181+
long long distance = skip[text[pos]];
182182

183183
// if no match, skip by distance to the next position
184184
if (distance != 0) {
@@ -188,12 +188,12 @@ std::vector<size_t> Search_BoyerMoore(const std::string& text, const std::string
188188

189189
// there is a match
190190

191-
size_t j;
191+
long long j;
192192

193193
// iterate through the text to check each character
194194
for (j = 0; j < patternLength; j++) {
195195
// if the current char in text being checked doesn't match that point in the pattern
196-
size_t pos = i + j;
196+
long long pos = i + j;
197197
if (text[pos] != pattern[j]) break; // break and move on
198198
}
199199

@@ -216,7 +216,7 @@ bool ShowMatchingIndexes() {
216216
return true;
217217
}
218218

219-
void ShowMatches(std::vector<size_t> matchingIndexes, std::string& text, std::string& pattern) {
219+
void ShowMatches(std::vector<long long> matchingIndexes, std::string& text, std::string& pattern) {
220220
ShowTimeTaken();
221221
if (matchingIndexes.empty()) std::cout << pattern << " is not in the given text!\n";
222222
else {
@@ -228,7 +228,7 @@ void ShowMatches(std::vector<size_t> matchingIndexes, std::string& text, std::st
228228

229229
std::string output = "";
230230

231-
for (size_t i = 0; i < matchingIndexes.size(); ++i)
231+
for (long long i = 0; i < matchingIndexes.size(); ++i)
232232
output.append(get_context(text, matchingIndexes[i], (i + 1)));
233233

234234
std::cout << output;
@@ -249,69 +249,68 @@ void BoyerMoore() {
249249
Clear(MessageType::BoyerMoore);
250250
std::cout << "Searching for: " << pattern;
251251

252-
std::vector<size_t> matchingIndexes = Search_BoyerMoore(text, pattern);
252+
std::vector<long long> matchingIndexes = Search_BoyerMoore(text, pattern);
253253
ShowMatches(matchingIndexes, text, pattern);
254254

255255
EndOfAlgorithm();
256256
}
257257

258-
size_t GetHashValue(size_t patternLength, size_t hashVal, size_t alphabet, size_t prime) {
259-
for (size_t i = 0; i < patternLength - 1; ++i)
258+
long long GetHashValue(long long patternLength, long long hashVal, long long alphabet, long long prime) {
259+
for (long long i = 0; i < patternLength - 1; ++i)
260260
hashVal = (hashVal * alphabet) % prime;
261261
return hashVal;
262262
}
263263

264-
size_t HashText(std::string& text, size_t& textHash, size_t patternLength, size_t alphabet, size_t prime) {
265-
size_t val = textHash;
266-
for (size_t i = 0; i < patternLength; i++)
267-
val = (alphabet * val + text[i]) % prime;
268-
return val;
264+
long long HashText(std::string text, long long textHash, long long patternLength, long long alphabet, long long prime) {
265+
for (long long i = 0; i < patternLength; ++i)
266+
textHash = (alphabet * textHash + text[i]) % prime;
267+
return textHash;
269268
}
270269

271-
size_t RollHash(std::string& text, size_t textHashVal, size_t hashVal, size_t i, size_t patternLength, size_t alphabet, size_t prime) {
270+
long long RollHash(std::string& text, long long textHashVal, long long hashVal, long long i, long long patternLength, long long alphabet, long long prime) {
272271
/* Get hash value of the next position
273272
* Subtract hash value of text[i]
274273
* Add value of text[i + patlen]
275274
* Divide total by prime number
276275
*/
277276

278-
size_t pos = i + patternLength;
277+
long long pos = i + patternLength;
279278
textHashVal = (alphabet * (textHashVal - text[i] * hashVal) + text[pos]) % prime;
280279

281280
// if textHash is below 0
282281
// add prime number to text hash
283282

284283
if (textHashVal < 0)
285-
textHashVal = textHashVal + prime;
284+
textHashVal += prime;
286285

287286
return textHashVal;
288287
}
289288

290-
std::vector<size_t> Search_RabinKarp(std::string& text, std::string& pattern) {
289+
std::vector<long long> Search_RabinKarp(std::string& text, std::string& pattern) {
291290
StartClock();
292291
// vector to hold returnable data
293-
std::vector<size_t> matchingIndexes;
292+
std::vector<long long> matchingIndexes;
294293
// Get lengths
295-
size_t patternLength = pattern.size();
296-
size_t textLength = text.size();
294+
long long patternLength = pattern.size();
295+
long long textLength = text.size();
297296

298297
// Count of possible chars in input
299-
const size_t alphabet = 256;
298+
const long long alphabet = 256;
300299
// Hash value of the pattern
301-
size_t patternHashVal = 0;
300+
long long patternHashVal = 0;
302301
// Hash value of the text
303-
size_t textHashVal = 0;
302+
long long textHashVal = 0;
304303
// prime number used to calculate hash
305-
const size_t prime = 17;
304+
const long long prime = 17;
306305

307306
// Calculate the hash value
308307
// initialise
309-
size_t hashVal = 1;
308+
long long hashVal = 1;
310309
// get hash value
311310
hashVal = GetHashValue(patternLength, hashVal, alphabet, prime);
312311

313312
// Keep iterators in scope
314-
size_t i, j;
313+
long long i = 0, j = 0;
315314

316315
// Get hash values of the pattern and text
317316
patternHashVal = HashText(pattern, patternHashVal, pattern.size(), alphabet, prime);
@@ -326,7 +325,7 @@ std::vector<size_t> Search_RabinKarp(std::string& text, std::string& pattern) {
326325
for (j = 0; j < patternLength; j++) {
327326
// check each char
328327

329-
size_t pos = i + j;
328+
long long pos = i + j;
330329
if (text[pos] != pattern[j])
331330
// break if mismatch
332331
break;
@@ -356,7 +355,7 @@ void RabinKarp() {
356355
Clear(MessageType::RabinKarp);
357356
std::cout << "Searching for: " << pattern;
358357

359-
std::vector<size_t> matchingIndexs = Search_RabinKarp(text, pattern);
358+
std::vector<long long> matchingIndexs = Search_RabinKarp(text, pattern);
360359

361360
ShowMatches(matchingIndexs, text, pattern);
362361

@@ -366,7 +365,7 @@ void RabinKarp() {
366365
void benchmarkRK() {
367366
// iterate text sample sizes
368367
// 2^0 to 2^13 sets of sample text
369-
for (size_t fullLoop = 1; fullLoop <= 8192; fullLoop *= 2)
368+
for (long long fullLoop = 1; fullLoop <= 8192; fullLoop *= 2)
370369
{
371370
// file to load
372371
std::string file = "search.txt";
@@ -375,7 +374,7 @@ void benchmarkRK() {
375374
// read once
376375
load_file(file, text);
377376
// countdown iterator for loop reading
378-
size_t j = fullLoop;
377+
long long j = fullLoop;
379378
// if more than 1 set is to be loaded
380379
// loop until 1 is reached
381380
while (j > 1) {
@@ -390,35 +389,35 @@ void benchmarkRK() {
390389
* Large: consectetur
391390
*/
392391
std::string patterns[] = { "nec","dolor","consectetur" };
393-
for (size_t patternLoop = 0; patternLoop < 3; patternLoop++)
392+
for (long long patternLoop = 0; patternLoop < 3; patternLoop++)
394393
{
395394
std::string pattern = patterns[patternLoop];
396-
for (size_t algoLoop = 0; algoLoop < 100; algoLoop++) {
395+
for (long long algoLoop = 0; algoLoop < 100; algoLoop++) {
397396
std::cout << "Sample Size: " << fullLoop << " | Pattern: " << pattern << " | Iteration: " << algoLoop << " | Time taken: " << std::chrono::duration_cast<std::chrono::microseconds>(timerEnd - timerStart).count() << std::endl;
398397
StartClock();
399398
// vector to hold returnable data
400-
std::vector<size_t> matchingIndexes;
399+
std::vector<long long> matchingIndexes;
401400
// Get lengths
402-
size_t patternLength = pattern.size();
403-
size_t textLength = text.size();
401+
long long patternLength = pattern.size();
402+
long long textLength = text.size();
404403

405404
// Count of possible chars in input
406-
const size_t alphabet = 256;
405+
const long long alphabet = 256;
407406
// Hash value of the pattern
408-
size_t patternHashVal = 0;
407+
long long patternHashVal = 0;
409408
// Hash value of the text
410-
size_t textHashVal = 0;
409+
long long textHashVal = 0;
411410
// prime number used to calculate hash
412-
const size_t prime = 17;
411+
const long long prime = 17;
413412

414413
// Calculate the hash value
415414
// initialise
416-
size_t hashVal = 1;
415+
long long hashVal = 1;
417416
// get hash value
418417
hashVal = GetHashValue(patternLength, hashVal, alphabet, prime);
419418

420419
// Keep iterators in scope
421-
size_t i, j;
420+
long long i, j;
422421

423422
// Get hash values of the pattern and text
424423
patternHashVal = HashText(pattern, patternHashVal, pattern.size(), alphabet, prime);
@@ -433,7 +432,7 @@ void benchmarkRK() {
433432
for (j = 0; j < patternLength; j++) {
434433
// check each char
435434

436-
size_t pos = i + j;
435+
long long pos = i + j;
437436
if (text[pos] != pattern[j])
438437
// break if mismatch
439438
break;
@@ -461,7 +460,7 @@ void benchmarkRK() {
461460
void benchmarkBM() {
462461
// iterate text sample sizes
463462
// 2^0 to 2^13 sets of sample text
464-
for (size_t fullLoop = 1; fullLoop <= 8192; fullLoop *= 2)
463+
for (long long fullLoop = 1; fullLoop <= 8192; fullLoop *= 2)
465464
{
466465
// file to load
467466
std::string file = "search.txt";
@@ -470,7 +469,7 @@ void benchmarkBM() {
470469
// read once
471470
load_file(file, text);
472471
// countdown iterator for loop reading
473-
size_t j = fullLoop;
472+
long long j = fullLoop;
474473
// if more than 1 set is to be loaded
475474
// loop until 1 is reached
476475
while (j > 1) {
@@ -485,37 +484,37 @@ void benchmarkBM() {
485484
* Large: consectetur
486485
*/
487486
std::string patterns[] = { "nec","dolor","consectetur" };
488-
for (size_t patternLoop = 0; patternLoop < 3; patternLoop++)
487+
for (long long patternLoop = 0; patternLoop < 3; patternLoop++)
489488
{
490489
std::string pattern = patterns[patternLoop];
491-
for (size_t algoLoop = 0; algoLoop < 100; algoLoop++)
490+
for (long long algoLoop = 0; algoLoop < 100; algoLoop++)
492491
{
493492
std::cout << "Sample Size: " << fullLoop << " | Pattern: " << pattern << " | Iteration: " << algoLoop << " | Time taken: " << std::chrono::duration_cast<std::chrono::microseconds>(timerEnd - timerStart).count() << std::endl;
494493
StartClock();
495-
size_t textLength = text.length();
496-
size_t patternLength = pattern.length();
494+
long long textLength = text.length();
495+
long long patternLength = pattern.length();
497496

498-
std::vector<size_t> matchingIndexes;
497+
std::vector<long long> matchingIndexes;
499498

500499
// lookup table to store how many places the given position should skip
501500
// ascii extended codes
502-
size_t skip[256] = {};
501+
long long skip[256] = {};
503502

504503
// set all points to be max skip value
505-
for (size_t i = 0; i < 256; ++i)
504+
for (long long i = 0; i < 256; ++i)
506505
// Not in the pattern.
507506
skip[i] = patternLength;
508507

509508
// for each of the characters in the pattern
510-
for (size_t i = 0; i < patternLength; ++i)
509+
for (long long i = 0; i < patternLength; ++i)
511510
// set that character to its length from the end of the pattern
512-
skip[size_t(pattern[i])] = (patternLength - 1) - i;
511+
skip[long long(pattern[i])] = (patternLength - 1) - i;
513512

514513
// iterate through all the text, stopping patternLength positions from the end of the text
515-
for (size_t i = 0; i < textLength - patternLength; ++i) {
514+
for (long long i = 0; i < textLength - patternLength; ++i) {
516515
// check if the last character in the pattern is a match
517-
size_t pos = i + patternLength - 1;
518-
size_t distance = skip[size_t(text[pos])];
516+
long long pos = i + patternLength - 1;
517+
long long distance = skip[long long(text[pos])];
519518

520519
// if no match, skip by distance to the next position
521520
if (distance != 0) {
@@ -525,12 +524,12 @@ void benchmarkBM() {
525524

526525
// there is a match
527526

528-
size_t j;
527+
long long j;
529528

530529
// iterate through the text to check each character
531530
for (j = 0; j < patternLength; j++) {
532531
// if the current char in text being checked doesn't match that point in the pattern
533-
size_t pos = i + j;
532+
long long pos = i + j;
534533
if (text[pos] != pattern[j]) break; // break and move on
535534
}
536535

0 commit comments

Comments
 (0)