build

2025-07-29 13:23:09 +08:00 · 2024-04-09 20:43:40 +08:00
parent d8caf02e9e
commit a6adc8e20a
48 changed files with 1599 additions and 571 deletions
--- a/en/docs/chapter_hashing/hash_collision.md
+++ b/en/docs/chapter_hashing/hash_collision.md
@ -1114,17 +1114,14 @@ The code below provides a simple implementation of a separate chaining hash tabl
            // 遍历桶，若遇到指定 key ，则更新对应 val 并返回
            for pair in bucket {
                if pair.key == key {
-                    pair.val = val.clone();
+                    pair.val = val;
                    return;
                }
            }
            let bucket = &mut self.buckets[index];

            // 若无该 key ，则将键值对添加至尾部
-            let pair = Pair {
-                key,
-                val: val.clone(),
-            };
+            let pair = Pair { key, val };
            bucket.push(pair);
            self.size += 1;
        }
@ -1328,7 +1325,7 @@ The code below provides a simple implementation of a separate chaining hash tabl
            capacity = 4
            loadThres = 2.0 / 3.0
            extendRatio = 2
-            buckets = ArrayList(capacity)
+            buckets = mutableListOf()
            for (i in 0..<capacity) {
                buckets.add(mutableListOf())
            }
@ -2960,16 +2957,21 @@ The code below implements an open addressing (linear probing) hash table with la
    ```kotlin title="hash_map_open_addressing.kt"
    /* 开放寻址哈希表 */
    class HashMapOpenAddressing {
-        private var size: Int = 0 // 键值对数量
-        private var capacity = 4 // 哈希表容量
-        private val loadThres: Double = 2.0 / 3.0 // 触发扩容的负载因子阈值
-        private val extendRatio = 2 // 扩容倍数
-        private var buckets: Array<Pair?> // 桶数组
-        private val TOMBSTONE = Pair(-1, "-1") // 删除标记
+        private var size: Int               // 键值对数量
+        private var capacity: Int           // 哈希表容量
+        private val loadThres: Double       // 触发扩容的负载因子阈值
+        private val extendRatio: Int        // 扩容倍数
+        private var buckets: Array<Pair?>   // 桶数组
+        private val TOMBSTONE: Pair         // 删除标记

        /* 构造方法 */
        init {
+            size = 0
+            capacity = 4
+            loadThres = 2.0 / 3.0
+            extendRatio = 2
            buckets = arrayOfNulls(capacity)
+            TOMBSTONE = Pair(-1, "-1")
        }

        /* 哈希函数 */
--- a/en/docs/chapter_hashing/hash_map.md
+++ b/en/docs/chapter_hashing/hash_map.md
@ -1548,15 +1548,9 @@ The following code implements a simple hash table. Here, we encapsulate `key` an

    /* 基于数组实现的哈希表 */
    class ArrayHashMap {
+        // 初始化数组，包含 100 个桶
        private val buckets = arrayOfNulls<Pair>(100)

-        init {
-            // 初始化数组，包含 100 个桶
-            for (i in 0..<100) {
-                buckets[i] = null
-            }
-        }
-
        /* 哈希函数 */
        fun hashFunc(key: Int): Int {
            val index = key % 100
@ -1586,25 +1580,27 @@ The following code implements a simple hash table. Here, we encapsulate `key` an

        /* 获取所有键值对 */
        fun pairSet(): MutableList<Pair> {
-            val pairSet = ArrayList<Pair>()
+            val pairSet = mutableListOf<Pair>()
            for (pair in buckets) {
-                if (pair != null) pairSet.add(pair)
+                if (pair != null)
+                    pairSet.add(pair)
            }
            return pairSet
        }

        /* 获取所有键 */
        fun keySet(): MutableList<Int> {
-            val keySet = ArrayList<Int>()
+            val keySet = mutableListOf<Int>()
            for (pair in buckets) {
-                if (pair != null) keySet.add(pair.key)
+                if (pair != null)
+                    keySet.add(pair.key)
            }
            return keySet
        }

        /* 获取所有值 */
        fun valueSet(): MutableList<String> {
-            val valueSet = ArrayList<String>()
+            val valueSet = mutableListOf<String>()
            for (pair in buckets) {
                pair?.let { valueSet.add(it.value) }
            }
@ -1616,22 +1612,16 @@ The following code implements a simple hash table. Here, we encapsulate `key` an
            for (kv in pairSet()) {
                val key = kv.key
                val value = kv.value
-                println("${key}->${value}")
+                println("${key} -> ${value}")
            }
        }
    }

    /* 基于数组实现的哈希表 */
    class ArrayHashMap {
+        // 初始化数组，包含 100 个桶
        private val buckets = arrayOfNulls<Pair>(100)

-        init {
-            // 初始化数组，包含 100 个桶
-            for (i in 0..<100) {
-                buckets[i] = null
-            }
-        }
-
        /* 哈希函数 */
        fun hashFunc(key: Int): Int {
            val index = key % 100
@ -1661,25 +1651,27 @@ The following code implements a simple hash table. Here, we encapsulate `key` an

        /* 获取所有键值对 */
        fun pairSet(): MutableList<Pair> {
-            val pairSet = ArrayList<Pair>()
+            val pairSet = mutableListOf<Pair>()
            for (pair in buckets) {
-                if (pair != null) pairSet.add(pair)
+                if (pair != null)
+                    pairSet.add(pair)
            }
            return pairSet
        }

        /* 获取所有键 */
        fun keySet(): MutableList<Int> {
-            val keySet = ArrayList<Int>()
+            val keySet = mutableListOf<Int>()
            for (pair in buckets) {
-                if (pair != null) keySet.add(pair.key)
+                if (pair != null)
+                    keySet.add(pair.key)
            }
            return keySet
        }

        /* 获取所有值 */
        fun valueSet(): MutableList<String> {
-            val valueSet = ArrayList<String>()
+            val valueSet = mutableListOf<String>()
            for (pair in buckets) {
                pair?.let { valueSet.add(it.value) }
            }
@ -1691,7 +1683,7 @@ The following code implements a simple hash table. Here, we encapsulate `key` an
            for (kv in pairSet()) {
                val key = kv.key
                val value = kv.value
-                println("${key}->${value}")
+                println("${key} -> ${value}")
            }
        }
    }
--- a/en/docs/chapter_hashing/summary.md
+++ b/en/docs/chapter_hashing/summary.md
@ -8,12 +8,12 @@ comments: true

 - Given an input `key`, a hash table can retrieve the corresponding `value` in $O(1)$ time, which is highly efficient.
 - Common hash table operations include querying, adding key-value pairs, deleting key-value pairs, and traversing the hash table.
- The hash function maps a `key` to an array index, allowing access to the corresponding bucket to retrieve the `value`.
+- The hash function maps a `key` to an array index, allowing access to the corresponding bucket and retrieval of the `value`.
 - Two different keys may end up with the same array index after hashing, leading to erroneous query results. This phenomenon is known as hash collision.
 - The larger the capacity of the hash table, the lower the probability of hash collisions. Therefore, hash table resizing can mitigate hash collisions. Similar to array resizing, hash table resizing is costly.
- Load factor, defined as the ratio of the number of elements to the number of buckets in the hash table, reflects the severity of hash collisions and is often used as a trigger for resizing the hash table.
+- The load factor, defined as the number of elements divided by the number of buckets, reflects the severity of hash collisions and is often used as a condition to trigger hash table resizing.
 - Chaining addresses hash collisions by converting each element into a linked list, storing all colliding elements in the same list. However, excessively long lists can reduce query efficiency, which can be improved by converting the lists into red-black trees.
- Open addressing handles hash collisions through multiple probes. Linear probing uses a fixed step size but cannot delete elements and is prone to clustering. Multiple hashing uses several hash functions for probing, making it less susceptible to clustering but increasing computational load.
+- Open addressing handles hash collisions through multiple probes. Linear probing uses a fixed step size but it cannot delete elements and is prone to clustering. Multiple hashing uses several hash functions for probing which reduces clustering compared to linear probing but increases computational overhead.
 - Different programming languages adopt various hash table implementations. For example, Java's `HashMap` uses chaining, while Python's `dict` employs open addressing.
 - In hash tables, we desire hash algorithms with determinism, high efficiency, and uniform distribution. In cryptography, hash algorithms should also possess collision resistance and the avalanche effect.
 - Hash algorithms typically use large prime numbers as moduli to ensure uniform distribution of hash values and reduce hash collisions.
@ -30,11 +30,11 @@ The time complexity of a hash table can degrade to $O(n)$ when hash collisions a

 Under the hash function $f(x) = x$, each element corresponds to a unique bucket index, which is equivalent to an array. However, the input space is usually much larger than the output space (array length), so the last step of a hash function is often to take the modulo of the array length. In other words, the goal of a hash table is to map a larger state space to a smaller one while providing $O(1)$ query efficiency.

-**Q**: Why can hash tables be more efficient than arrays, linked lists, or binary trees, even though they are implemented using these structures?
+**Q**: Why can hash tables be more efficient than arrays, linked lists, or binary trees, even though hash tables are implemented using these structures?

 Firstly, hash tables have higher time efficiency but lower space efficiency. A significant portion of memory in hash tables remains unused.

-Secondly, they are only more efficient in specific use cases. If a feature can be implemented with the same time complexity using an array or a linked list, it's usually faster than using a hash table. This is because the computation of the hash function incurs overhead, making the constant factor in the time complexity larger.
+Secondly, hash tables are only more time-efficient in specific use cases. If a feature can be implemented with the same time complexity using an array or a linked list, it's usually faster than using a hash table. This is because the computation of the hash function incurs overhead, making the constant factor in the time complexity larger.

 Lastly, the time complexity of hash tables can degrade. For example, in chaining, we perform search operations in a linked list or red-black tree, which still risks degrading to $O(n)$ time.