Boyer-Moore algorithm for substring search

Theory

Implementation

#include <bits/stdc++.h>
using namespace std;
// Reference: https://algs4.cs.princeton.edu/53substring/

class BoyerMoore {
private:
    int const R = 256;  // The radix
    vector<int> right;  // Bad-character skip array
    string pattern;     // The pattern
public:
    BoyerMoore(string pattern) {
        this->pattern = pattern;
        int m = pattern.size();
        // Compute rightmost occurrence of each character in pattern
        right = vector<int>(R, -1);
        for (int j = 0; j < m; j++) {
            right[pattern[j]] = j;
        }
    }

    int search(string const& text) {
        int n = text.size();
        int m = pattern.size();
        int skip;
        for (int i = 0; i + m <= n; i += skip) {
            skip = 0;
            for (int j = m - 1; j >= 0; j--) {
                if (text[i+j] == pattern[j]) { continue; }

                // Because right[c] = -1 if c does not exist in the pattern,
                // this step covers two cases:
                // 1. Align the pattern with the rightmost occurrence of c
                // 2. Shift the pattern past the mismatch (i.e., skip to j + 1)
                skip = j - right[text[i+j]];

                // If the rightmost occurrence of text[i+j] is to the right
                // of pattern, shift one position to the right.
                skip = max(skip, 1);
                break;
            }
            if (skip == 0) { return i; }
        }
        return -1;
    }
};

struct TestCase {
    string text, pattern;
};

int main() {
    vector<TestCase> testCases{
        {"mississippi", "issip"},
        {"mississippi", "ppi"},
        {"mississippi", "sip"},
        {"mississippi", "abc"},
    };

    for (auto tc : testCases) {
        cout << "Text = '" << tc.text << "' Pattern = '" << tc.pattern << "'\n";
        BoyerMoore bm(tc.pattern);
        int idx = bm.search(tc.text);
        if (idx == -1) {
            cout << "'" << tc.pattern << "' not found in '" << tc.text << "'\n";
        } else {
            cout << tc.text << "\n";
            cout << string(idx, ' ') << string(tc.pattern.size(), '^') << "\n";
        }
        cout << "\n";
    }
}

Practice problems


by

Tags: