Move files to strings folder (#4283)

* Move files to strings-folder

* moved the file "words" back to the original folder

* moved "anagram.py" also back

* fix the codespell ignore-list
This commit is contained in:
algobytewise
2021-03-21 16:35:10 +05:30
committed by GitHub
parent 2c6f553ccb
commit 99a42f2b58
7 changed files with 2 additions and 2 deletions

View File

@ -0,0 +1,64 @@
END = "#"
class Trie:
def __init__(self):
self._trie = {}
def insert_word(self, text):
trie = self._trie
for char in text:
if char not in trie:
trie[char] = {}
trie = trie[char]
trie[END] = True
def find_word(self, prefix):
trie = self._trie
for char in prefix:
if char in trie:
trie = trie[char]
else:
return []
return self._elements(trie)
def _elements(self, d):
result = []
for c, v in d.items():
if c == END:
sub_result = [" "]
else:
sub_result = [c + s for s in self._elements(v)]
result.extend(sub_result)
return tuple(result)
trie = Trie()
words = ("depart", "detergent", "daring", "dog", "deer", "deal")
for word in words:
trie.insert_word(word)
def autocomplete_using_trie(s):
"""
>>> trie = Trie()
>>> for word in words:
... trie.insert_word(word)
...
>>> matches = autocomplete_using_trie("de")
"detergent " in matches
True
"dog " in matches
False
"""
suffixes = trie.find_word(s)
return tuple(s + w for w in suffixes)
def main():
print(autocomplete_using_trie("de"))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,61 @@
import os
UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
def loadDictionary():
path = os.path.split(os.path.realpath(__file__))
englishWords = {}
with open(path[0] + "/dictionary.txt") as dictionaryFile:
for word in dictionaryFile.read().split("\n"):
englishWords[word] = None
return englishWords
ENGLISH_WORDS = loadDictionary()
def getEnglishCount(message):
message = message.upper()
message = removeNonLetters(message)
possibleWords = message.split()
if possibleWords == []:
return 0.0
matches = 0
for word in possibleWords:
if word in ENGLISH_WORDS:
matches += 1
return float(matches) / len(possibleWords)
def removeNonLetters(message):
lettersOnly = []
for symbol in message:
if symbol in LETTERS_AND_SPACE:
lettersOnly.append(symbol)
return "".join(lettersOnly)
def isEnglish(message, wordPercentage=20, letterPercentage=85):
"""
>>> isEnglish('Hello World')
True
>>> isEnglish('llold HorWd')
False
"""
wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
numLetters = len(removeNonLetters(message))
messageLettersPercentage = (float(numLetters) / len(message)) * 100
lettersMatch = messageLettersPercentage >= letterPercentage
return wordsMatch and lettersMatch
if __name__ == "__main__":
import doctest
doctest.testmod()

45334
strings/dictionary.txt Normal file

File diff suppressed because it is too large Load Diff

120
strings/frequency_finder.py Normal file
View File

@ -0,0 +1,120 @@
# Frequency Finder
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
englishLetterFreq = {
"E": 12.70,
"T": 9.06,
"A": 8.17,
"O": 7.51,
"I": 6.97,
"N": 6.75,
"S": 6.33,
"H": 6.09,
"R": 5.99,
"D": 4.25,
"L": 4.03,
"C": 2.78,
"U": 2.76,
"M": 2.41,
"W": 2.36,
"F": 2.23,
"G": 2.02,
"Y": 1.97,
"P": 1.93,
"B": 1.29,
"V": 0.98,
"K": 0.77,
"J": 0.15,
"X": 0.15,
"Q": 0.10,
"Z": 0.07,
}
ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
def getLetterCount(message):
letterCount = {
"A": 0,
"B": 0,
"C": 0,
"D": 0,
"E": 0,
"F": 0,
"G": 0,
"H": 0,
"I": 0,
"J": 0,
"K": 0,
"L": 0,
"M": 0,
"N": 0,
"O": 0,
"P": 0,
"Q": 0,
"R": 0,
"S": 0,
"T": 0,
"U": 0,
"V": 0,
"W": 0,
"X": 0,
"Y": 0,
"Z": 0,
}
for letter in message.upper():
if letter in LETTERS:
letterCount[letter] += 1
return letterCount
def getItemAtIndexZero(x):
return x[0]
def getFrequencyOrder(message):
letterToFreq = getLetterCount(message)
freqToLetter = {}
for letter in LETTERS:
if letterToFreq[letter] not in freqToLetter:
freqToLetter[letterToFreq[letter]] = [letter]
else:
freqToLetter[letterToFreq[letter]].append(letter)
for freq in freqToLetter:
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
freqToLetter[freq] = "".join(freqToLetter[freq])
freqPairs = list(freqToLetter.items())
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
freqOrder = []
for freqPair in freqPairs:
freqOrder.append(freqPair[1])
return "".join(freqOrder)
def englishFreqMatchScore(message):
"""
>>> englishFreqMatchScore('Hello World')
1
"""
freqOrder = getFrequencyOrder(message)
matchScore = 0
for commonLetter in ETAOIN[:6]:
if commonLetter in freqOrder[:6]:
matchScore += 1
for uncommonLetter in ETAOIN[-6:]:
if uncommonLetter in freqOrder[-6:]:
matchScore += 1
return matchScore
if __name__ == "__main__":
import doctest
doctest.testmod()

66
strings/palindrome.py Normal file
View File

@ -0,0 +1,66 @@
# Algorithms to determine if a string is palindrome
test_data = {
"MALAYALAM": True,
"String": False,
"rotor": True,
"level": True,
"A": True,
"BB": True,
"ABC": False,
"amanaplanacanalpanama": True, # "a man a plan a canal panama"
}
# Ensure our test data is valid
assert all((key == key[::-1]) is value for key, value in test_data.items())
def is_palindrome(s: str) -> bool:
"""
Return True if s is a palindrome otherwise return False.
>>> all(is_palindrome(key) is value for key, value in test_data.items())
True
"""
start_i = 0
end_i = len(s) - 1
while start_i < end_i:
if s[start_i] == s[end_i]:
start_i += 1
end_i -= 1
else:
return False
return True
def is_palindrome_recursive(s: str) -> bool:
"""
Return True if s is a palindrome otherwise return False.
>>> all(is_palindrome_recursive(key) is value for key, value in test_data.items())
True
"""
if len(s) <= 1:
return True
if s[0] == s[len(s) - 1]:
return is_palindrome_recursive(s[1:-1])
else:
return False
def is_palindrome_slice(s: str) -> bool:
"""
Return True if s is a palindrome otherwise return False.
>>> all(is_palindrome_slice(key) is value for key, value in test_data.items())
True
"""
return s == s[::-1]
if __name__ == "__main__":
for key, value in test_data.items():
assert is_palindrome(key) is is_palindrome_recursive(key)
assert is_palindrome(key) is is_palindrome_slice(key)
print(f"{key:21} {value}")
print("a man a plan a canal panama")

44
strings/word_patterns.py Normal file
View File

@ -0,0 +1,44 @@
def get_word_pattern(word: str) -> str:
"""
>>> get_word_pattern("pattern")
'0.1.2.2.3.4.5'
>>> get_word_pattern("word pattern")
'0.1.2.3.4.5.6.7.7.8.2.9'
>>> get_word_pattern("get word pattern")
'0.1.2.3.4.5.6.7.3.8.9.2.2.1.6.10'
"""
word = word.upper()
next_num = 0
letter_nums = {}
word_pattern = []
for letter in word:
if letter not in letter_nums:
letter_nums[letter] = str(next_num)
next_num += 1
word_pattern.append(letter_nums[letter])
return ".".join(word_pattern)
if __name__ == "__main__":
import pprint
import time
start_time = time.time()
with open("dictionary.txt") as in_file:
wordList = in_file.read().splitlines()
all_patterns = {}
for word in wordList:
pattern = get_word_pattern(word)
if pattern in all_patterns:
all_patterns[pattern].append(word)
else:
all_patterns[pattern] = [word]
with open("word_patterns.txt", "w") as out_file:
out_file.write(pprint.pformat(all_patterns))
totalTime = round(time.time() - start_time, 2)
print(f"Done! {len(all_patterns):,} word patterns found in {totalTime} seconds.")
# Done! 9,581 word patterns found in 0.58 seconds.