From a3af358c3f287af4410831df9ac1c652685b4122 Mon Sep 17 00:00:00 2001
From: louietyj <louietyj@gmail.com>
Date: Fri, 20 Oct 2017 17:28:17 +0800
Subject: [PATCH] Add Rabin-Karp rolling hash (#64)

* Rabin-Karp hash implementation

* Test cases for Rabin-Karp hash
---
 utilities/python/rabin_karp_hash.py | 41 +++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 utilities/python/rabin_karp_hash.py

diff --git a/utilities/python/rabin_karp_hash.py b/utilities/python/rabin_karp_hash.py
new file mode 100644
index 00000000..9f0fd105
--- /dev/null
+++ b/utilities/python/rabin_karp_hash.py
@@ -0,0 +1,41 @@
+## Rabin-Karp Rolling Hash
+## Implementation of: https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm#Hash_function_used
+##
+## This rolling hash function is useful when you need to compute the hash of successive substrings
+## of text. E.g. note that going from 'abcd' to 'bcde', we drop the 'a' from the back and add an 'e'
+## on the right. The rolling hash function thus allows us to update the hash in-place O(1) instead of
+## recomputing the full hash of the substring O(m), where m is the length of the substring.
+##
+## NOTE: The implementation below takes in a tuple of integers, to be as general as possible. For use
+## with strings, simply take the ASCII value of each character before passing into the functions.
+
+BASE = 101  # Arbitrary prime number
+
+def rk_hash_init(tpl):
+    '''Initializes the hash with a tuple of integers.'''
+    return sum(n * BASE ** i for i, n in enumerate(reversed(tpl)))
+
+def rk_hash_update(curr_hash, size, add_n, rem_n):
+    '''Updates the hash by removing an integer from the left and appending
+    an integer to the right.
+
+    curr_hash: The previous hash
+    size: The size of the rolling window
+    add_n: The integer appended to the right
+    rem_n: The integer removed from the left'''
+    return (curr_hash - (rem_n * BASE ** (size - 1))) * BASE + add_n
+
+
+
+abc_hash = rk_hash_init(tuple(map(ord, 'abc')))     # Init the hash with 'abc'
+print('abc:', abc_hash)
+bcd_hash_1 = rk_hash_update(abc_hash, 3, ord('d'), ord('a'))    # Add a 'd' to the right, remove an 'a' from the left
+print('bcd 1:', bcd_hash_1)
+
+zbc_hash = rk_hash_init(tuple(map(ord, 'zbc')))     # Init the hash with 'zbc'
+print('zbc:', zbc_hash)
+bcd_hash_2 = rk_hash_update(zbc_hash, 3, ord('d'), ord('z'))    # Add a 'd' to the right, remove a 'z' from the left
+print('bcd 2:', bcd_hash_2)
+
+# Notice that both hash values are the same despite arriving via different paths
+print(bcd_hash_1 == bcd_hash_2)