Refactor the hash_map_open_addressing implementation with lazy reallocation. (#776)

2025-12-19 07:17:54 +08:00 · 2023-09-21 04:43:15 -05:00
parent 45e20e57a1
commit a46b482951
4 changed files with 190 additions and 145 deletions
--- a/codes/python/chapter_hashing/hash_map_open_addressing.py
+++ b/codes/python/chapter_hashing/hash_map_open_addressing.py
@@ -20,7 +20,7 @@ class HashMapOpenAddressing:
        self.load_thres = 2 / 3  # 触发扩容的负载因子阈值
        self.extend_ratio = 2  # 扩容倍数
        self.buckets: list[Pair | None] = [None] * self.capacity  # 桶数组
-        self.removed = Pair(-1, "-1")  # 删除标记
+        self.TOMBSTONE = Pair(-1, "-1")  # 删除标记

    def hash_func(self, key: int) -> int:
        """哈希函数"""
@@ -30,55 +30,61 @@ class HashMapOpenAddressing:
        """负载因子"""
        return self.size / self.capacity

+    def find_bucket(self, key: int) -> int:
+        """搜索 key 对应的桶索引"""
+        index = self.hash_func(key)
+        first_tombstone = -1
+        # 线性探测，当遇到空桶时跳出
+        while self.buckets[index] is not None:
+            # 若遇到 key ，返回对应桶索引
+            if self.buckets[index].key == key:
+                # 若之前遇到了删除标记，则将键值对移动至该索引
+                if first_tombstone != -1:
+                    self.buckets[first_tombstone] = self.buckets[index]
+                    self.buckets[index] = self.TOMBSTONE
+                    return first_tombstone  # 返回移动后的桶索引
+                return index  # 返回桶索引
+            # 记录遇到的首个删除标记
+            if first_tombstone == -1 and self.buckets[index] is self.TOMBSTONE:
+                first_tombstone = index
+            # 计算桶索引，越过尾部返回头部
+            index = (index + 1) % self.capacity
+        # 若 key 不存在，则返回添加点的索引
+        return index if first_tombstone == -1 else first_tombstone
+
    def get(self, key: int) -> str:
        """查询操作"""
-        index = self.hash_func(key)
-        # 线性探测，从 index 开始向后遍历
-        for i in range(self.capacity):
-            # 计算桶索引，越过尾部返回头部
-            j = (index + i) % self.capacity
-            # 若遇到空桶，说明无此 key ，则返回 None
-            if self.buckets[j] is None:
-                return None
-            # 若遇到指定 key ，则返回对应 val
-            if self.buckets[j].key == key and self.buckets[j] != self.removed:
-                return self.buckets[j].val
+        # 搜索 key 对应的桶索引
+        index = self.find_bucket(key)
+        # 若找到键值对，则返回对应 val
+        if self.buckets[index] not in [None, self.TOMBSTONE]:
+            return self.buckets[index].val
+        # 若键值对不存在，则返回 None
+        return None

    def put(self, key: int, val: str):
        """添加操作"""
        # 当负载因子超过阈值时，执行扩容
        if self.load_factor() > self.load_thres:
            self.extend()
-        index = self.hash_func(key)
-        # 线性探测，从 index 开始向后遍历
-        for i in range(self.capacity):
-            # 计算桶索引，越过尾部返回头部
-            j = (index + i) % self.capacity
-            # 若遇到空桶、或带有删除标记的桶，则将键值对放入该桶
-            if self.buckets[j] in [None, self.removed]:
-                self.buckets[j] = Pair(key, val)
-                self.size += 1
-                return
-            # 若遇到指定 key ，则更新对应 val
-            if self.buckets[j].key == key:
-                self.buckets[j].val = val
-                return
+        # 搜索 key 对应的桶索引
+        index = self.find_bucket(key)
+        # 若找到键值对，则覆盖 val 并返回
+        if self.buckets[index] not in [None, self.TOMBSTONE]:
+            self.buckets[index].val = val
+            return
+        # 若键值对不存在，则添加该键值对
+        self.buckets[index] = Pair(key, val)
+        self.size += 1

    def remove(self, key: int):
        """删除操作"""
-        index = self.hash_func(key)
-        # 线性探测，从 index 开始向后遍历
-        for i in range(self.capacity):
-            # 计算桶索引，越过尾部返回头部
-            j = (index + i) % self.capacity
-            # 若遇到空桶，说明无此 key ，则直接返回
-            if self.buckets[j] is None:
-                return
-            # 若遇到指定 key ，则标记删除并返回
-            if self.buckets[j].key == key:
-                self.buckets[j] = self.removed
-                self.size -= 1
-                return
+        # 搜索 key 对应的桶索引
+        index = self.find_bucket(key)
+        # 若找到键值对，则用删除标记覆盖它
+        if self.buckets[index] not in [None, self.TOMBSTONE]:
+            self.buckets[index] = self.TOMBSTONE
+            self.size -= 1

    def extend(self):
        """扩容哈希表"""
@@ -90,21 +96,23 @@ class HashMapOpenAddressing:
        self.size = 0
        # 将键值对从原哈希表搬运至新哈希表
        for pair in buckets_tmp:
-            if pair not in [None, self.removed]:
+            if pair not in [None, self.TOMBSTONE]:
                self.put(pair.key, pair.val)

    def print(self):
        """打印哈希表"""
        for pair in self.buckets:
-            if pair is not None:
-                print(pair.key, "->", pair.val)
-            else:
+            if pair is None:
                print("None")
+            elif pair is self.TOMBSTONE:
+                print("TOMBSTONE")
+            else:
+                print(pair.key, "->", pair.val)


 """Driver Code"""
 if __name__ == "__main__":
-    # 测试代码
+    # 初始化哈希表
    hashmap = HashMapOpenAddressing()

    # 添加操作