mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-07-05 17:34:49 +08:00
feat: add strings/ngram algorithm (#6074)
* feat: added ngram algorithm * fix(test): use `math.isclose` to match floating point numbers approximately Co-authored-by: Christian Clauss <cclauss@me.com> Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com>
This commit is contained in:
23
strings/ngram.py
Normal file
23
strings/ngram.py
Normal file
@ -0,0 +1,23 @@
|
||||
"""
|
||||
https://en.wikipedia.org/wiki/N-gram
|
||||
"""
|
||||
|
||||
|
||||
def create_ngram(sentence: str, ngram_size: int) -> list[str]:
|
||||
"""
|
||||
Create ngrams from a sentence
|
||||
|
||||
>>> create_ngram("I am a sentence", 2)
|
||||
['I ', ' a', 'am', 'm ', ' a', 'a ', ' s', 'se', 'en', 'nt', 'te', 'en', 'nc', 'ce']
|
||||
>>> create_ngram("I am an NLPer", 2)
|
||||
['I ', ' a', 'am', 'm ', ' a', 'an', 'n ', ' N', 'NL', 'LP', 'Pe', 'er']
|
||||
>>> create_ngram("This is short", 50)
|
||||
[]
|
||||
"""
|
||||
return [sentence[i : i + ngram_size] for i in range(len(sentence) - ngram_size + 1)]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from doctest import testmod
|
||||
|
||||
testmod()
|
Reference in New Issue
Block a user