mirror of
https://github.com/TheAlgorithms/Java.git
synced 2025-07-08 18:32:56 +08:00
Enhance docs, add more tests in BloomFilter
(#5948)
This commit is contained in:
@ -4,6 +4,11 @@ import java.util.BitSet;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* A generic BloomFilter implementation for probabilistic membership checking.
|
* A generic BloomFilter implementation for probabilistic membership checking.
|
||||||
|
* <p>
|
||||||
|
* Bloom filters are space-efficient data structures that provide a fast way to test whether an
|
||||||
|
* element is a member of a set. They may produce false positives, indicating an element is
|
||||||
|
* in the set when it is not, but they will never produce false negatives.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @param <T> The type of elements to be stored in the Bloom filter.
|
* @param <T> The type of elements to be stored in the Bloom filter.
|
||||||
*/
|
*/
|
||||||
@ -17,10 +22,14 @@ public class BloomFilter<T> {
|
|||||||
* Constructs a BloomFilter with a specified number of hash functions and bit array size.
|
* Constructs a BloomFilter with a specified number of hash functions and bit array size.
|
||||||
*
|
*
|
||||||
* @param numberOfHashFunctions the number of hash functions to use
|
* @param numberOfHashFunctions the number of hash functions to use
|
||||||
* @param bitArraySize the size of the bit array
|
* @param bitArraySize the size of the bit array, which determines the capacity of the filter
|
||||||
|
* @throws IllegalArgumentException if numberOfHashFunctions or bitArraySize is less than 1
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public BloomFilter(int numberOfHashFunctions, int bitArraySize) {
|
public BloomFilter(int numberOfHashFunctions, int bitArraySize) {
|
||||||
|
if (numberOfHashFunctions < 1 || bitArraySize < 1) {
|
||||||
|
throw new IllegalArgumentException("Number of hash functions and bit array size must be greater than 0");
|
||||||
|
}
|
||||||
this.numberOfHashFunctions = numberOfHashFunctions;
|
this.numberOfHashFunctions = numberOfHashFunctions;
|
||||||
this.bitArray = new BitSet(bitArraySize);
|
this.bitArray = new BitSet(bitArraySize);
|
||||||
this.hashFunctions = new Hash[numberOfHashFunctions];
|
this.hashFunctions = new Hash[numberOfHashFunctions];
|
||||||
@ -28,7 +37,7 @@ public class BloomFilter<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the hash functions with unique indices.
|
* Initializes the hash functions with unique indices to ensure different hashing.
|
||||||
*/
|
*/
|
||||||
private void initializeHashFunctions() {
|
private void initializeHashFunctions() {
|
||||||
for (int i = 0; i < numberOfHashFunctions; i++) {
|
for (int i = 0; i < numberOfHashFunctions; i++) {
|
||||||
@ -38,8 +47,12 @@ public class BloomFilter<T> {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Inserts an element into the Bloom filter.
|
* Inserts an element into the Bloom filter.
|
||||||
|
* <p>
|
||||||
|
* This method hashes the element using all defined hash functions and sets the corresponding
|
||||||
|
* bits in the bit array.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @param key the element to insert
|
* @param key the element to insert into the Bloom filter
|
||||||
*/
|
*/
|
||||||
public void insert(T key) {
|
public void insert(T key) {
|
||||||
for (Hash<T> hash : hashFunctions) {
|
for (Hash<T> hash : hashFunctions) {
|
||||||
@ -50,8 +63,13 @@ public class BloomFilter<T> {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if an element might be in the Bloom filter.
|
* Checks if an element might be in the Bloom filter.
|
||||||
|
* <p>
|
||||||
|
* This method checks the bits at the positions computed by each hash function. If any of these
|
||||||
|
* bits are not set, the element is definitely not in the filter. If all bits are set, the element
|
||||||
|
* might be in the filter.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @param key the element to check
|
* @param key the element to check for membership in the Bloom filter
|
||||||
* @return {@code true} if the element might be in the Bloom filter, {@code false} if it is definitely not
|
* @return {@code true} if the element might be in the Bloom filter, {@code false} if it is definitely not
|
||||||
*/
|
*/
|
||||||
public boolean contains(T key) {
|
public boolean contains(T key) {
|
||||||
@ -66,6 +84,9 @@ public class BloomFilter<T> {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Inner class representing a hash function used by the Bloom filter.
|
* Inner class representing a hash function used by the Bloom filter.
|
||||||
|
* <p>
|
||||||
|
* Each instance of this class represents a different hash function based on its index.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @param <T> The type of elements to be hashed.
|
* @param <T> The type of elements to be hashed.
|
||||||
*/
|
*/
|
||||||
@ -76,7 +97,7 @@ public class BloomFilter<T> {
|
|||||||
/**
|
/**
|
||||||
* Constructs a Hash function with a specified index.
|
* Constructs a Hash function with a specified index.
|
||||||
*
|
*
|
||||||
* @param index the index of this hash function
|
* @param index the index of this hash function, used to create a unique hash
|
||||||
*/
|
*/
|
||||||
Hash(int index) {
|
Hash(int index) {
|
||||||
this.index = index;
|
this.index = index;
|
||||||
@ -84,9 +105,13 @@ public class BloomFilter<T> {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the hash of the given key.
|
* Computes the hash of the given key.
|
||||||
|
* <p>
|
||||||
|
* The hash value is calculated by multiplying the index of the hash function
|
||||||
|
* with the ASCII sum of the string representation of the key.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @param key the element to hash
|
* @param key the element to hash
|
||||||
* @return the hash value
|
* @return the computed hash value
|
||||||
*/
|
*/
|
||||||
public int compute(T key) {
|
public int compute(T key) {
|
||||||
return index * asciiString(String.valueOf(key));
|
return index * asciiString(String.valueOf(key));
|
||||||
@ -94,9 +119,13 @@ public class BloomFilter<T> {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the ASCII value sum of the characters in a string.
|
* Computes the ASCII value sum of the characters in a string.
|
||||||
|
* <p>
|
||||||
|
* This method iterates through each character of the string and accumulates
|
||||||
|
* their ASCII values to produce a single integer value.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @param word the string to compute
|
* @param word the string to compute
|
||||||
* @return the sum of ASCII values of the characters
|
* @return the sum of ASCII values of the characters in the string
|
||||||
*/
|
*/
|
||||||
private int asciiString(String word) {
|
private int asciiString(String word) {
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
|
@ -62,4 +62,55 @@ public class BloomFilterTest {
|
|||||||
|
|
||||||
Assertions.assertFalse(bloomFilter.contains("key" + 200));
|
Assertions.assertFalse(bloomFilter.contains("key" + 200));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testEmptyFilterContains() {
|
||||||
|
Assertions.assertFalse(bloomFilter.contains("notInserted"), "Filter should not contain any elements when empty");
|
||||||
|
Assertions.assertFalse(bloomFilter.contains(null), "Filter should not contain null elements");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testDifferentTypes() {
|
||||||
|
BloomFilter<Object> filter = new BloomFilter<>(3, 100);
|
||||||
|
filter.insert("string");
|
||||||
|
filter.insert(123);
|
||||||
|
filter.insert(45.67);
|
||||||
|
|
||||||
|
Assertions.assertTrue(filter.contains("string"), "Filter should contain the string 'string'");
|
||||||
|
Assertions.assertTrue(filter.contains(123), "Filter should contain the integer 123");
|
||||||
|
Assertions.assertTrue(filter.contains(45.67), "Filter should contain the double 45.67");
|
||||||
|
Assertions.assertFalse(filter.contains("missing"), "Filter should not contain elements that were not inserted");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testFalsePositiveAfterInsertions() {
|
||||||
|
bloomFilter.insert("cat");
|
||||||
|
bloomFilter.insert("dog");
|
||||||
|
bloomFilter.insert("fish");
|
||||||
|
|
||||||
|
// Checking for an element that was not added
|
||||||
|
Assertions.assertFalse(bloomFilter.contains("bird"), "Filter should not contain 'bird' which was never inserted");
|
||||||
|
|
||||||
|
// To increase chances of false positives, we can add more items
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
bloomFilter.insert("item" + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
Assertions.assertFalse(bloomFilter.contains("nonexistent"), "Filter should not contain 'nonexistent' which was never inserted");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testBoundaryConditions() {
|
||||||
|
BloomFilter<String> filter = new BloomFilter<>(3, 10);
|
||||||
|
filter.insert("a");
|
||||||
|
filter.insert("b");
|
||||||
|
filter.insert("c");
|
||||||
|
filter.insert("d");
|
||||||
|
|
||||||
|
Assertions.assertTrue(filter.contains("a"), "Filter should contain 'a'");
|
||||||
|
Assertions.assertTrue(filter.contains("b"), "Filter should contain 'b'");
|
||||||
|
Assertions.assertTrue(filter.contains("c"), "Filter should contain 'c'");
|
||||||
|
Assertions.assertTrue(filter.contains("d"), "Filter should contain 'd'");
|
||||||
|
Assertions.assertFalse(filter.contains("e"), "Filter should not contain 'e' which was not inserted");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user