Theory
Implementation
#include <bits/stdc++.h>
using namespace std;
// Reference: https://algs4.cs.princeton.edu/53substring/
class BoyerMoore {
private:
int const R = 256; // The radix
vector<int> right; // Bad-character skip array
string pattern; // The pattern
public:
BoyerMoore(string pattern) {
this->pattern = pattern;
int m = pattern.size();
// Compute rightmost occurrence of each character in pattern
right = vector<int>(R, -1);
for (int j = 0; j < m; j++) {
right[pattern[j]] = j;
}
}
int search(string const& text) {
int n = text.size();
int m = pattern.size();
int skip;
for (int i = 0; i + m <= n; i += skip) {
skip = 0;
for (int j = m - 1; j >= 0; j--) {
if (text[i+j] == pattern[j]) { continue; }
// Because right[c] = -1 if c does not exist in the pattern,
// this step covers two cases:
// 1. Align the pattern with the rightmost occurrence of c
// 2. Shift the pattern past the mismatch (i.e., skip to j + 1)
skip = j - right[text[i+j]];
// If the rightmost occurrence of text[i+j] is to the right
// of pattern, shift one position to the right.
skip = max(skip, 1);
break;
}
if (skip == 0) { return i; }
}
return -1;
}
};
struct TestCase {
string text, pattern;
};
int main() {
vector<TestCase> testCases{
{"mississippi", "issip"},
{"mississippi", "ppi"},
{"mississippi", "sip"},
{"mississippi", "abc"},
};
for (auto tc : testCases) {
cout << "Text = '" << tc.text << "' Pattern = '" << tc.pattern << "'\n";
BoyerMoore bm(tc.pattern);
int idx = bm.search(tc.text);
if (idx == -1) {
cout << "'" << tc.pattern << "' not found in '" << tc.text << "'\n";
} else {
cout << tc.text << "\n";
cout << string(idx, ' ') << string(tc.pattern.size(), '^') << "\n";
}
cout << "\n";
}
}
Practice problems