python Boyer-Moore字符串搜索算法:Python实现
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python Boyer-Moore字符串搜索算法:Python实现相关的知识,希望对你有一定的参考价值。
# coding = utf-8
from copy import deepcopy
def generate_bad_char(string: str) -> {int: {str: int}}:
# when scan to a position, generate the bad char list for it
bad_char_dic: {int: {str: int}} = {}
for i in range(len(string)):
bad_char_dic[i] = {}
if i > 0:
last_char = string[i - 1]
bad_char_dic[i] = deepcopy(bad_char_dic[i - 1])
bad_char_dic[i][last_char] = i - 1
return bad_char_dic
def generate_prefix(string: str) -> {str: int}:
"""generate prefix until position"""
prefix_dic: {str: int} = {}
for i in range(len(string) - 1):
prefix_dic[string[:i + 1]] = i
return prefix_dic
def generate_good_suffix(string: str) -> [int]:
"""given certain length of suffix, if mismatch, where to go"""
prefix_dic = generate_prefix(string)
length = len(string)
# when there is a good suffix, where to move
good_suffix_dic: [int] = [-1]
for i in range(length - 1, 0, -1):
current_suffix = string[i:]
perfect_match = string[:-1].rfind(current_suffix)
if perfect_match != -1:
good_suffix_dic.append(perfect_match + (length - i) - 1)
else:
sub_suffix_last_seen = -1
for j in range(1, len(current_suffix)):
sub_suffix = current_suffix[j:]
try:
sub_suffix_last_seen = prefix_dic[sub_suffix]
break
except KeyError:
pass
good_suffix_dic.append(sub_suffix_last_seen)
return good_suffix_dic
def search_for_keyword(haystack: str, needle: str) -> int:
if haystack is None or needle is None:
return -1
if type(haystack) is list:
haystack = ''.join(map(str, haystack))
else:
haystack = str(haystack)
if type(needle) is list:
needle = ''.join(map(str, needle))
else:
needle = str(needle)
bad_char_dict = generate_bad_char(needle)
good_suffix_dict = generate_good_suffix(needle)
needle_length, haystack_length = len(needle), len(haystack)
if needle_length > haystack_length:
return -1
elif needle_length == haystack_length:
return 0 if haystack == needle else -1
elif needle_length == 0:
return 0
elif needle_length == 1:
return haystack.find(needle)
haystack_pointer = needle_length - 1
while haystack_pointer < haystack_length:
match = True
good_suffix_length = 0
for needle_pointer in range(needle_length - 1, -1, -1):
if haystack[haystack_pointer] != needle[needle_pointer]:
match = False
# calculate bad char shift
bad_char_last_seen = -1
try:
bad_char_last_seen = bad_char_dict[needle_pointer][haystack[haystack_pointer]]
except KeyError:
pass
bad_char_shift = needle_pointer - bad_char_last_seen
# calculate good suffix shift
good_suffix_shift = 0
if good_suffix_length > 0:
good_suffix_position = good_suffix_dict[good_suffix_length]
good_suffix_shift = needle_length - 1 - good_suffix_position
shift = max(bad_char_shift, good_suffix_shift)
haystack_pointer += shift + good_suffix_length
break
else:
haystack_pointer -= 1
good_suffix_length += 1
if match:
return haystack_pointer + 1
return -1
if __name__ == '__main__':
print(search_for_keyword('671111091113298117115999711432117110973297103117106973210111032117110321129710697114', '111109111'))
以上是关于python Boyer-Moore字符串搜索算法:Python实现的主要内容,如果未能解决你的问题,请参考以下文章
哪个是更好的字符串搜索算法? Boyer-Moore 还是 Boyer Moore Horspool? [关闭]
哪个是更好的字符串搜索算法? Boyer-Moore 还是 Boyer Moore Horspool? [关闭]
是否有一个Boyer-Moore字符串搜索和快速搜索和替换功能以及Delphi 2010 String(UnicodeString)的快速字符串计数?