Add doctests for the boyer_moore_search algorithm. (#12769)

* Add doctests for the boyer_moore_search algorithm.

* Update boyer_moore_search.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update boyer_moore_search.py

* Update boyer_moore_search.py

---------

Co-authored-by: Maxim Smolskiy <mithridatus@mail.ru>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Mindaugas
2025-06-02 20:32:47 +03:00
committed by GitHub
parent 4b077c06b6
commit c3d4b9e54d

View File

@ -11,23 +11,31 @@ If the mismatched character does not occur to the left in Pattern,
a shift is proposed that moves the entirety of Pattern past a shift is proposed that moves the entirety of Pattern past
the point of mismatch in the text. the point of mismatch in the text.
If there no mismatch then the pattern matches with text block. If there is no mismatch then the pattern matches with text block.
Time Complexity : O(n/m) Time Complexity : O(n/m)
n=length of main string n=length of main string
m=length of pattern string m=length of pattern string
""" """
from __future__ import annotations
class BoyerMooreSearch: class BoyerMooreSearch:
"""
Example usage:
bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
positions = bms.bad_character_heuristic()
where 'positions' contain the locations where the pattern was matched.
"""
def __init__(self, text: str, pattern: str): def __init__(self, text: str, pattern: str):
self.text, self.pattern = text, pattern self.text, self.pattern = text, pattern
self.textLen, self.patLen = len(text), len(pattern) self.textLen, self.patLen = len(text), len(pattern)
def match_in_pattern(self, char: str) -> int: def match_in_pattern(self, char: str) -> int:
"""finds the index of char in pattern in reverse order """
Finds the index of char in pattern in reverse order.
Parameters : Parameters :
char (chr): character to be searched char (chr): character to be searched
@ -35,6 +43,10 @@ class BoyerMooreSearch:
Returns : Returns :
i (int): index of char from last in pattern i (int): index of char from last in pattern
-1 (int): if char is not found in pattern -1 (int): if char is not found in pattern
>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.match_in_pattern("B")
1
""" """
for i in range(self.patLen - 1, -1, -1): for i in range(self.patLen - 1, -1, -1):
@ -44,8 +56,8 @@ class BoyerMooreSearch:
def mismatch_in_text(self, current_pos: int) -> int: def mismatch_in_text(self, current_pos: int) -> int:
""" """
find the index of mis-matched character in text when compared with pattern Find the index of mis-matched character in text when compared with pattern
from last from last.
Parameters : Parameters :
current_pos (int): current index position of text current_pos (int): current index position of text
@ -53,6 +65,10 @@ class BoyerMooreSearch:
Returns : Returns :
i (int): index of mismatched char from last in text i (int): index of mismatched char from last in text
-1 (int): if there is no mismatch between pattern and text block -1 (int): if there is no mismatch between pattern and text block
>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.mismatch_in_text(2)
3
""" """
for i in range(self.patLen - 1, -1, -1): for i in range(self.patLen - 1, -1, -1):
@ -61,7 +77,14 @@ class BoyerMooreSearch:
return -1 return -1
def bad_character_heuristic(self) -> list[int]: def bad_character_heuristic(self) -> list[int]:
# searches pattern in text and returns index positions """
Finds the positions of the pattern location.
>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.bad_character_heuristic()
[0, 3]
"""
positions = [] positions = []
for i in range(self.textLen - self.patLen + 1): for i in range(self.textLen - self.patLen + 1):
mismatch_index = self.mismatch_in_text(i) mismatch_index = self.mismatch_in_text(i)
@ -75,13 +98,7 @@ class BoyerMooreSearch:
return positions return positions
text = "ABAABA" if __name__ == "__main__":
pattern = "AB" import doctest
bms = BoyerMooreSearch(text, pattern)
positions = bms.bad_character_heuristic()
if len(positions) == 0: doctest.testmod()
print("No match found")
else:
print("Pattern found in following positions: ")
print(positions)