From 134b42c7ff3e838dcba0a85d386822d0dd8385fc Mon Sep 17 00:00:00 2001 From: Alex Klymenko Date: Thu, 15 Aug 2024 10:30:53 +0200 Subject: [PATCH] refactor: `BloomFilter` (#5325) --- .../bloomfilter/BloomFilter.java | 82 +++++++++++++++---- .../bloomfilter/BloomFilterTest.java | 44 +++++++++- 2 files changed, 106 insertions(+), 20 deletions(-) diff --git a/src/main/java/com/thealgorithms/datastructures/bloomfilter/BloomFilter.java b/src/main/java/com/thealgorithms/datastructures/bloomfilter/BloomFilter.java index a327690d7..33ea22c3d 100644 --- a/src/main/java/com/thealgorithms/datastructures/bloomfilter/BloomFilter.java +++ b/src/main/java/com/thealgorithms/datastructures/bloomfilter/BloomFilter.java @@ -2,35 +2,61 @@ package com.thealgorithms.datastructures.bloomfilter; import java.util.BitSet; +/** + * A generic BloomFilter implementation for probabilistic membership checking. + * + * @param The type of elements to be stored in the Bloom filter. + */ public class BloomFilter { - private int numberOfHashFunctions; - private BitSet bitArray; - private Hash[] hashFunctions; + private final int numberOfHashFunctions; + private final BitSet bitArray; + private final Hash[] hashFunctions; - public BloomFilter(int numberOfHashFunctions, int n) { + /** + * Constructs a BloomFilter with a specified number of hash functions and bit array size. + * + * @param numberOfHashFunctions the number of hash functions to use + * @param bitArraySize the size of the bit array + */ + @SuppressWarnings("unchecked") + public BloomFilter(int numberOfHashFunctions, int bitArraySize) { this.numberOfHashFunctions = numberOfHashFunctions; - hashFunctions = new Hash[numberOfHashFunctions]; - bitArray = new BitSet(n); - insertHash(); + this.bitArray = new BitSet(bitArraySize); + this.hashFunctions = new Hash[numberOfHashFunctions]; + initializeHashFunctions(); } - private void insertHash() { + /** + * Initializes the hash functions with unique indices. + */ + private void initializeHashFunctions() { for (int i = 0; i < numberOfHashFunctions; i++) { - hashFunctions[i] = new Hash(i); + hashFunctions[i] = new Hash<>(i); } } + /** + * Inserts an element into the Bloom filter. + * + * @param key the element to insert + */ public void insert(T key) { for (Hash hash : hashFunctions) { - int position = hash.compute(key) % bitArray.size(); + int position = Math.abs(hash.compute(key) % bitArray.size()); bitArray.set(position); } } + /** + * Checks if an element might be in the Bloom filter. + * + * @param key the element to check + * @return {@code true} if the element might be in the Bloom filter, {@code false} if it is definitely not + */ public boolean contains(T key) { for (Hash hash : hashFunctions) { - int position = hash.compute(key) % bitArray.size(); + int position = Math.abs(hash.compute(key) % bitArray.size()); if (!bitArray.get(position)) { return false; } @@ -38,24 +64,46 @@ public class BloomFilter { return true; } - private class Hash { + /** + * Inner class representing a hash function used by the Bloom filter. + * + * @param The type of elements to be hashed. + */ + private static class Hash { - int index; + private final int index; + /** + * Constructs a Hash function with a specified index. + * + * @param index the index of this hash function + */ Hash(int index) { this.index = index; } + /** + * Computes the hash of the given key. + * + * @param key the element to hash + * @return the hash value + */ public int compute(T key) { return index * asciiString(String.valueOf(key)); } + /** + * Computes the ASCII value sum of the characters in a string. + * + * @param word the string to compute + * @return the sum of ASCII values of the characters + */ private int asciiString(String word) { - int number = 0; - for (int i = 0; i < word.length(); i++) { - number += word.charAt(i); + int sum = 0; + for (char c : word.toCharArray()) { + sum += c; } - return number; + return sum; } } } diff --git a/src/test/java/com/thealgorithms/datastructures/bloomfilter/BloomFilterTest.java b/src/test/java/com/thealgorithms/datastructures/bloomfilter/BloomFilterTest.java index d6b137406..b19801a5a 100644 --- a/src/test/java/com/thealgorithms/datastructures/bloomfilter/BloomFilterTest.java +++ b/src/test/java/com/thealgorithms/datastructures/bloomfilter/BloomFilterTest.java @@ -1,12 +1,19 @@ package com.thealgorithms.datastructures.bloomfilter; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class BloomFilterTest { + private BloomFilter bloomFilter; + + @BeforeEach + void setUp() { + bloomFilter = new BloomFilter<>(3, 100); + } @Test - public void test1() { + public void testIntegerContains() { BloomFilter bloomFilter = new BloomFilter<>(3, 10); bloomFilter.insert(3); bloomFilter.insert(17); @@ -16,12 +23,43 @@ public class BloomFilterTest { } @Test - public void test2() { - BloomFilter bloomFilter = new BloomFilter<>(4, 20); + public void testStringContains() { bloomFilter.insert("omar"); bloomFilter.insert("mahamid"); Assertions.assertTrue(bloomFilter.contains("omar")); Assertions.assertTrue(bloomFilter.contains("mahamid")); } + + @Test + void testInsertAndContains() { + bloomFilter.insert("hello"); + bloomFilter.insert("world"); + + Assertions.assertTrue(bloomFilter.contains("hello")); + Assertions.assertTrue(bloomFilter.contains("world")); + Assertions.assertFalse(bloomFilter.contains("java")); + } + + @Test + void testFalsePositive() { + bloomFilter.insert("apple"); + bloomFilter.insert("banana"); + + Assertions.assertFalse(bloomFilter.contains("grape")); + Assertions.assertFalse(bloomFilter.contains("orange")); + } + + @Test + void testMultipleInsertions() { + for (int i = 0; i < 100; i++) { + bloomFilter.insert("key" + i); + } + + for (int i = 0; i < 100; i++) { + Assertions.assertTrue(bloomFilter.contains("key" + i)); + } + + Assertions.assertFalse(bloomFilter.contains("key" + 200)); + } }