mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-07-04 16:57:32 +08:00
Move files to strings folder (#4283)
* Move files to strings-folder * moved the file "words" back to the original folder * moved "anagram.py" also back * fix the codespell ignore-list
This commit is contained in:
64
strings/autocomplete_using_trie.py
Normal file
64
strings/autocomplete_using_trie.py
Normal file
@ -0,0 +1,64 @@
|
||||
END = "#"
|
||||
|
||||
|
||||
class Trie:
|
||||
def __init__(self):
|
||||
self._trie = {}
|
||||
|
||||
def insert_word(self, text):
|
||||
trie = self._trie
|
||||
for char in text:
|
||||
if char not in trie:
|
||||
trie[char] = {}
|
||||
trie = trie[char]
|
||||
trie[END] = True
|
||||
|
||||
def find_word(self, prefix):
|
||||
trie = self._trie
|
||||
for char in prefix:
|
||||
if char in trie:
|
||||
trie = trie[char]
|
||||
else:
|
||||
return []
|
||||
return self._elements(trie)
|
||||
|
||||
def _elements(self, d):
|
||||
result = []
|
||||
for c, v in d.items():
|
||||
if c == END:
|
||||
sub_result = [" "]
|
||||
else:
|
||||
sub_result = [c + s for s in self._elements(v)]
|
||||
result.extend(sub_result)
|
||||
return tuple(result)
|
||||
|
||||
|
||||
trie = Trie()
|
||||
words = ("depart", "detergent", "daring", "dog", "deer", "deal")
|
||||
for word in words:
|
||||
trie.insert_word(word)
|
||||
|
||||
|
||||
def autocomplete_using_trie(s):
|
||||
"""
|
||||
>>> trie = Trie()
|
||||
>>> for word in words:
|
||||
... trie.insert_word(word)
|
||||
...
|
||||
>>> matches = autocomplete_using_trie("de")
|
||||
|
||||
"detergent " in matches
|
||||
True
|
||||
"dog " in matches
|
||||
False
|
||||
"""
|
||||
suffixes = trie.find_word(s)
|
||||
return tuple(s + w for w in suffixes)
|
||||
|
||||
|
||||
def main():
|
||||
print(autocomplete_using_trie("de"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
61
strings/detecting_english_programmatically.py
Normal file
61
strings/detecting_english_programmatically.py
Normal file
@ -0,0 +1,61 @@
|
||||
import os
|
||||
|
||||
UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
|
||||
|
||||
|
||||
def loadDictionary():
|
||||
path = os.path.split(os.path.realpath(__file__))
|
||||
englishWords = {}
|
||||
with open(path[0] + "/dictionary.txt") as dictionaryFile:
|
||||
for word in dictionaryFile.read().split("\n"):
|
||||
englishWords[word] = None
|
||||
return englishWords
|
||||
|
||||
|
||||
ENGLISH_WORDS = loadDictionary()
|
||||
|
||||
|
||||
def getEnglishCount(message):
|
||||
message = message.upper()
|
||||
message = removeNonLetters(message)
|
||||
possibleWords = message.split()
|
||||
|
||||
if possibleWords == []:
|
||||
return 0.0
|
||||
|
||||
matches = 0
|
||||
for word in possibleWords:
|
||||
if word in ENGLISH_WORDS:
|
||||
matches += 1
|
||||
|
||||
return float(matches) / len(possibleWords)
|
||||
|
||||
|
||||
def removeNonLetters(message):
|
||||
lettersOnly = []
|
||||
for symbol in message:
|
||||
if symbol in LETTERS_AND_SPACE:
|
||||
lettersOnly.append(symbol)
|
||||
return "".join(lettersOnly)
|
||||
|
||||
|
||||
def isEnglish(message, wordPercentage=20, letterPercentage=85):
|
||||
"""
|
||||
>>> isEnglish('Hello World')
|
||||
True
|
||||
|
||||
>>> isEnglish('llold HorWd')
|
||||
False
|
||||
"""
|
||||
wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
|
||||
numLetters = len(removeNonLetters(message))
|
||||
messageLettersPercentage = (float(numLetters) / len(message)) * 100
|
||||
lettersMatch = messageLettersPercentage >= letterPercentage
|
||||
return wordsMatch and lettersMatch
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
45334
strings/dictionary.txt
Normal file
45334
strings/dictionary.txt
Normal file
File diff suppressed because it is too large
Load Diff
120
strings/frequency_finder.py
Normal file
120
strings/frequency_finder.py
Normal file
@ -0,0 +1,120 @@
|
||||
# Frequency Finder
|
||||
|
||||
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
|
||||
englishLetterFreq = {
|
||||
"E": 12.70,
|
||||
"T": 9.06,
|
||||
"A": 8.17,
|
||||
"O": 7.51,
|
||||
"I": 6.97,
|
||||
"N": 6.75,
|
||||
"S": 6.33,
|
||||
"H": 6.09,
|
||||
"R": 5.99,
|
||||
"D": 4.25,
|
||||
"L": 4.03,
|
||||
"C": 2.78,
|
||||
"U": 2.76,
|
||||
"M": 2.41,
|
||||
"W": 2.36,
|
||||
"F": 2.23,
|
||||
"G": 2.02,
|
||||
"Y": 1.97,
|
||||
"P": 1.93,
|
||||
"B": 1.29,
|
||||
"V": 0.98,
|
||||
"K": 0.77,
|
||||
"J": 0.15,
|
||||
"X": 0.15,
|
||||
"Q": 0.10,
|
||||
"Z": 0.07,
|
||||
}
|
||||
ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
|
||||
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
|
||||
|
||||
def getLetterCount(message):
|
||||
letterCount = {
|
||||
"A": 0,
|
||||
"B": 0,
|
||||
"C": 0,
|
||||
"D": 0,
|
||||
"E": 0,
|
||||
"F": 0,
|
||||
"G": 0,
|
||||
"H": 0,
|
||||
"I": 0,
|
||||
"J": 0,
|
||||
"K": 0,
|
||||
"L": 0,
|
||||
"M": 0,
|
||||
"N": 0,
|
||||
"O": 0,
|
||||
"P": 0,
|
||||
"Q": 0,
|
||||
"R": 0,
|
||||
"S": 0,
|
||||
"T": 0,
|
||||
"U": 0,
|
||||
"V": 0,
|
||||
"W": 0,
|
||||
"X": 0,
|
||||
"Y": 0,
|
||||
"Z": 0,
|
||||
}
|
||||
for letter in message.upper():
|
||||
if letter in LETTERS:
|
||||
letterCount[letter] += 1
|
||||
|
||||
return letterCount
|
||||
|
||||
|
||||
def getItemAtIndexZero(x):
|
||||
return x[0]
|
||||
|
||||
|
||||
def getFrequencyOrder(message):
|
||||
letterToFreq = getLetterCount(message)
|
||||
freqToLetter = {}
|
||||
for letter in LETTERS:
|
||||
if letterToFreq[letter] not in freqToLetter:
|
||||
freqToLetter[letterToFreq[letter]] = [letter]
|
||||
else:
|
||||
freqToLetter[letterToFreq[letter]].append(letter)
|
||||
|
||||
for freq in freqToLetter:
|
||||
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
|
||||
freqToLetter[freq] = "".join(freqToLetter[freq])
|
||||
|
||||
freqPairs = list(freqToLetter.items())
|
||||
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
|
||||
|
||||
freqOrder = []
|
||||
for freqPair in freqPairs:
|
||||
freqOrder.append(freqPair[1])
|
||||
|
||||
return "".join(freqOrder)
|
||||
|
||||
|
||||
def englishFreqMatchScore(message):
|
||||
"""
|
||||
>>> englishFreqMatchScore('Hello World')
|
||||
1
|
||||
"""
|
||||
freqOrder = getFrequencyOrder(message)
|
||||
matchScore = 0
|
||||
for commonLetter in ETAOIN[:6]:
|
||||
if commonLetter in freqOrder[:6]:
|
||||
matchScore += 1
|
||||
|
||||
for uncommonLetter in ETAOIN[-6:]:
|
||||
if uncommonLetter in freqOrder[-6:]:
|
||||
matchScore += 1
|
||||
|
||||
return matchScore
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
66
strings/palindrome.py
Normal file
66
strings/palindrome.py
Normal file
@ -0,0 +1,66 @@
|
||||
# Algorithms to determine if a string is palindrome
|
||||
|
||||
test_data = {
|
||||
"MALAYALAM": True,
|
||||
"String": False,
|
||||
"rotor": True,
|
||||
"level": True,
|
||||
"A": True,
|
||||
"BB": True,
|
||||
"ABC": False,
|
||||
"amanaplanacanalpanama": True, # "a man a plan a canal panama"
|
||||
}
|
||||
# Ensure our test data is valid
|
||||
assert all((key == key[::-1]) is value for key, value in test_data.items())
|
||||
|
||||
|
||||
def is_palindrome(s: str) -> bool:
|
||||
"""
|
||||
Return True if s is a palindrome otherwise return False.
|
||||
|
||||
>>> all(is_palindrome(key) is value for key, value in test_data.items())
|
||||
True
|
||||
"""
|
||||
|
||||
start_i = 0
|
||||
end_i = len(s) - 1
|
||||
while start_i < end_i:
|
||||
if s[start_i] == s[end_i]:
|
||||
start_i += 1
|
||||
end_i -= 1
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_palindrome_recursive(s: str) -> bool:
|
||||
"""
|
||||
Return True if s is a palindrome otherwise return False.
|
||||
|
||||
>>> all(is_palindrome_recursive(key) is value for key, value in test_data.items())
|
||||
True
|
||||
"""
|
||||
if len(s) <= 1:
|
||||
return True
|
||||
if s[0] == s[len(s) - 1]:
|
||||
return is_palindrome_recursive(s[1:-1])
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def is_palindrome_slice(s: str) -> bool:
|
||||
"""
|
||||
Return True if s is a palindrome otherwise return False.
|
||||
|
||||
>>> all(is_palindrome_slice(key) is value for key, value in test_data.items())
|
||||
True
|
||||
"""
|
||||
return s == s[::-1]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for key, value in test_data.items():
|
||||
assert is_palindrome(key) is is_palindrome_recursive(key)
|
||||
assert is_palindrome(key) is is_palindrome_slice(key)
|
||||
print(f"{key:21} {value}")
|
||||
print("a man a plan a canal panama")
|
44
strings/word_patterns.py
Normal file
44
strings/word_patterns.py
Normal file
@ -0,0 +1,44 @@
|
||||
def get_word_pattern(word: str) -> str:
|
||||
"""
|
||||
>>> get_word_pattern("pattern")
|
||||
'0.1.2.2.3.4.5'
|
||||
>>> get_word_pattern("word pattern")
|
||||
'0.1.2.3.4.5.6.7.7.8.2.9'
|
||||
>>> get_word_pattern("get word pattern")
|
||||
'0.1.2.3.4.5.6.7.3.8.9.2.2.1.6.10'
|
||||
"""
|
||||
word = word.upper()
|
||||
next_num = 0
|
||||
letter_nums = {}
|
||||
word_pattern = []
|
||||
|
||||
for letter in word:
|
||||
if letter not in letter_nums:
|
||||
letter_nums[letter] = str(next_num)
|
||||
next_num += 1
|
||||
word_pattern.append(letter_nums[letter])
|
||||
return ".".join(word_pattern)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pprint
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
with open("dictionary.txt") as in_file:
|
||||
wordList = in_file.read().splitlines()
|
||||
|
||||
all_patterns = {}
|
||||
for word in wordList:
|
||||
pattern = get_word_pattern(word)
|
||||
if pattern in all_patterns:
|
||||
all_patterns[pattern].append(word)
|
||||
else:
|
||||
all_patterns[pattern] = [word]
|
||||
|
||||
with open("word_patterns.txt", "w") as out_file:
|
||||
out_file.write(pprint.pformat(all_patterns))
|
||||
|
||||
totalTime = round(time.time() - start_time, 2)
|
||||
print(f"Done! {len(all_patterns):,} word patterns found in {totalTime} seconds.")
|
||||
# Done! 9,581 word patterns found in 0.58 seconds.
|
Reference in New Issue
Block a user