Only one carriage return (#2155)

* updating DIRECTORY.md

* touch

* fixup! Format Python code with psf/black push

* Update word_frequency_functions.py

* updating DIRECTORY.md

* Update word_frequency_functions.py

* Update lfu_cache.py

* Update sol1.py

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
Christian Clauss
2020-06-25 19:15:30 +02:00
committed by GitHub
parent d2fa91b18e
commit 8ab84fd794
7 changed files with 77 additions and 68 deletions

View File

@ -40,7 +40,7 @@ from math import log10
"""
def term_frequency(term : str, document : str) -> int:
def term_frequency(term: str, document: str) -> int:
"""
Return the number of times a term occurs within
a given document.
@ -58,9 +58,7 @@ def term_frequency(term : str, document : str) -> int:
str.maketrans("", "", string.punctuation)
).replace("\n", "")
tokenize_document = document_without_punctuation.split(" ") # word tokenization
return len(
[word for word in tokenize_document if word.lower() == term.lower()]
)
return len([word for word in tokenize_document if word.lower() == term.lower()])
def document_frequency(term: str, corpus: str) -> int:
@ -77,17 +75,18 @@ is the second document in the corpus.\\nTHIS is \
the third document in the corpus.")
(1, 3)
"""
corpus_without_punctuation = corpus.translate(
corpus_without_punctuation = corpus.lower().translate(
str.maketrans("", "", string.punctuation)
) # strip all punctuation and replace it with ''
documents = corpus_without_punctuation.split("\n")
lowercase_documents = [document.lower() for document in documents]
return len(
[document for document in lowercase_documents if term.lower() in document]
), len(documents)
docs = corpus_without_punctuation.split("\n")
term = term.lower()
return (
len([doc for doc in docs if term in doc]),
len(docs),
)
def inverse_document_frequency(df : int, N: int) -> float:
def inverse_document_frequency(df: int, N: int) -> float:
"""
Return an integer denoting the importance
of a word. This measure of importance is
@ -116,7 +115,7 @@ def inverse_document_frequency(df : int, N: int) -> float:
return round(log10(N / df), 3)
def tf_idf(tf : int, idf: int) -> float:
def tf_idf(tf: int, idf: int) -> float:
"""
Combine the term frequency
and inverse document frequency functions to