mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-07-05 09:21:13 +08:00
Only one carriage return (#2155)
* updating DIRECTORY.md * touch * fixup! Format Python code with psf/black push * Update word_frequency_functions.py * updating DIRECTORY.md * Update word_frequency_functions.py * Update lfu_cache.py * Update sol1.py Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
@ -40,7 +40,7 @@ from math import log10
|
||||
"""
|
||||
|
||||
|
||||
def term_frequency(term : str, document : str) -> int:
|
||||
def term_frequency(term: str, document: str) -> int:
|
||||
"""
|
||||
Return the number of times a term occurs within
|
||||
a given document.
|
||||
@ -58,9 +58,7 @@ def term_frequency(term : str, document : str) -> int:
|
||||
str.maketrans("", "", string.punctuation)
|
||||
).replace("\n", "")
|
||||
tokenize_document = document_without_punctuation.split(" ") # word tokenization
|
||||
return len(
|
||||
[word for word in tokenize_document if word.lower() == term.lower()]
|
||||
)
|
||||
return len([word for word in tokenize_document if word.lower() == term.lower()])
|
||||
|
||||
|
||||
def document_frequency(term: str, corpus: str) -> int:
|
||||
@ -77,17 +75,18 @@ is the second document in the corpus.\\nTHIS is \
|
||||
the third document in the corpus.")
|
||||
(1, 3)
|
||||
"""
|
||||
corpus_without_punctuation = corpus.translate(
|
||||
corpus_without_punctuation = corpus.lower().translate(
|
||||
str.maketrans("", "", string.punctuation)
|
||||
) # strip all punctuation and replace it with ''
|
||||
documents = corpus_without_punctuation.split("\n")
|
||||
lowercase_documents = [document.lower() for document in documents]
|
||||
return len(
|
||||
[document for document in lowercase_documents if term.lower() in document]
|
||||
), len(documents)
|
||||
docs = corpus_without_punctuation.split("\n")
|
||||
term = term.lower()
|
||||
return (
|
||||
len([doc for doc in docs if term in doc]),
|
||||
len(docs),
|
||||
)
|
||||
|
||||
|
||||
def inverse_document_frequency(df : int, N: int) -> float:
|
||||
def inverse_document_frequency(df: int, N: int) -> float:
|
||||
"""
|
||||
Return an integer denoting the importance
|
||||
of a word. This measure of importance is
|
||||
@ -116,7 +115,7 @@ def inverse_document_frequency(df : int, N: int) -> float:
|
||||
return round(log10(N / df), 3)
|
||||
|
||||
|
||||
def tf_idf(tf : int, idf: int) -> float:
|
||||
def tf_idf(tf: int, idf: int) -> float:
|
||||
"""
|
||||
Combine the term frequency
|
||||
and inverse document frequency functions to
|
||||
|
Reference in New Issue
Block a user