feat(compression): Add LZW and Arithmetic Coding algorithms (#6799)

* feat(compression): Add LZW and Arithmetic Coding algorithms

* test(compression): Improve test coverage for LZW and ArithmeticCoding

* style(compression): fix code style
This commit is contained in:
Indole Yi
2025-10-20 02:11:22 +08:00
committed by GitHub
parent a7f0bab021
commit 4a97258189
4 changed files with 551 additions and 0 deletions

View File

@@ -0,0 +1,154 @@
package com.thealgorithms.compression;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.math.BigDecimal;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
class ArithmeticCodingTest {
@Test
void testThrowsExceptionForNullOrEmptyInput() {
// Test that null input throws IllegalArgumentException
assertThrows(IllegalArgumentException.class, () -> ArithmeticCoding.compress(null));
// Test that empty string throws IllegalArgumentException
assertThrows(IllegalArgumentException.class, () -> ArithmeticCoding.compress(""));
}
@Test
void testCompressionAndDecompressionSimple() {
String original = "BABA";
Map<Character, ArithmeticCoding.Symbol> probTable = ArithmeticCoding.calculateProbabilities(original);
BigDecimal compressed = ArithmeticCoding.compress(original);
// Verify that compression produces a valid number in [0, 1)
assertNotNull(compressed);
assertTrue(compressed.compareTo(BigDecimal.ZERO) >= 0);
assertTrue(compressed.compareTo(BigDecimal.ONE) < 0);
// Verify decompression restores the original string
String decompressed = ArithmeticCoding.decompress(compressed, original.length(), probTable);
assertEquals(original, decompressed);
}
@Test
void testSymmetryWithComplexString() {
String original = "THE_QUICK_BROWN_FOX_JUMPS_OVER_THE_LAZY_DOG";
Map<Character, ArithmeticCoding.Symbol> probTable = ArithmeticCoding.calculateProbabilities(original);
BigDecimal compressed = ArithmeticCoding.compress(original);
// Verify compression produces a number in valid range
assertTrue(compressed.compareTo(BigDecimal.ZERO) >= 0);
assertTrue(compressed.compareTo(BigDecimal.ONE) < 0);
// Verify symmetry: decompress(compress(x)) == x
String decompressed = ArithmeticCoding.decompress(compressed, original.length(), probTable);
assertEquals(original, decompressed);
}
@Test
void testSymmetryWithRepetitions() {
String original = "MISSISSIPPI";
Map<Character, ArithmeticCoding.Symbol> probTable = ArithmeticCoding.calculateProbabilities(original);
BigDecimal compressed = ArithmeticCoding.compress(original);
// Verify compression produces a number in valid range
assertTrue(compressed.compareTo(BigDecimal.ZERO) >= 0);
assertTrue(compressed.compareTo(BigDecimal.ONE) < 0);
// Verify the compression-decompression cycle
String decompressed = ArithmeticCoding.decompress(compressed, original.length(), probTable);
assertEquals(original, decompressed);
}
@Test
void testSingleCharacterString() {
String original = "AAAAA";
Map<Character, ArithmeticCoding.Symbol> probTable = ArithmeticCoding.calculateProbabilities(original);
BigDecimal compressed = ArithmeticCoding.compress(original);
// Even with a single unique character, compression should work
assertTrue(compressed.compareTo(BigDecimal.ZERO) >= 0);
assertTrue(compressed.compareTo(BigDecimal.ONE) < 0);
String decompressed = ArithmeticCoding.decompress(compressed, original.length(), probTable);
assertEquals(original, decompressed);
}
@Test
void testCompressionOutputDemo() {
// Demonstrate actual compression output similar to LZW test
String original = "BABA";
BigDecimal compressed = ArithmeticCoding.compress(original);
// Example: "BABA" compresses to approximately 0.625
// This shows that the entire message is encoded as a single number
System.out.println("Original: " + original);
System.out.println("Compressed to: " + compressed);
System.out.println("Compression: " + original.length() + " characters -> 1 BigDecimal number");
// Verify the compressed value is in valid range [0, 1)
assertTrue(compressed.compareTo(BigDecimal.ZERO) >= 0);
assertTrue(compressed.compareTo(BigDecimal.ONE) < 0);
}
@Test
void testProbabilityTableCalculation() {
// Test that probability table is calculated correctly
String text = "AABBC";
Map<Character, ArithmeticCoding.Symbol> probTable = ArithmeticCoding.calculateProbabilities(text);
// Verify all characters are in the table
assertTrue(probTable.containsKey('A'));
assertTrue(probTable.containsKey('B'));
assertTrue(probTable.containsKey('C'));
// Verify probability ranges are valid
for (ArithmeticCoding.Symbol symbol : probTable.values()) {
assertTrue(symbol.low().compareTo(BigDecimal.ZERO) >= 0);
assertTrue(symbol.high().compareTo(BigDecimal.ONE) <= 0);
assertTrue(symbol.low().compareTo(symbol.high()) < 0);
}
}
@Test
void testDecompressionWithMismatchedProbabilityTable() {
// Test decompression with a probability table that doesn't match the original
String original = "ABCD";
BigDecimal compressed = ArithmeticCoding.compress(original);
// Create a different probability table (for "XYZ" instead of "ABCD")
Map<Character, ArithmeticCoding.Symbol> wrongProbTable = ArithmeticCoding.calculateProbabilities("XYZ");
// Decompression with wrong probability table should produce incorrect output
String decompressed = ArithmeticCoding.decompress(compressed, original.length(), wrongProbTable);
// The decompressed string will be different from original (likely all 'X', 'Y', or 'Z')
// This tests the edge case where the compressed value doesn't fall into expected ranges
assertNotNull(decompressed);
assertEquals(original.length(), decompressed.length());
}
@Test
void testDecompressionWithValueOutsideSymbolRanges() {
// Create a custom probability table
Map<Character, ArithmeticCoding.Symbol> probTable = new HashMap<>();
probTable.put('A', new ArithmeticCoding.Symbol(new BigDecimal("0.0"), new BigDecimal("0.5")));
probTable.put('B', new ArithmeticCoding.Symbol(new BigDecimal("0.5"), new BigDecimal("1.0")));
// Use a compressed value that should decode properly
BigDecimal compressed = new BigDecimal("0.25"); // Falls in 'A' range
String decompressed = ArithmeticCoding.decompress(compressed, 3, probTable);
// Verify decompression completes (even if result might not be meaningful)
assertNotNull(decompressed);
assertEquals(3, decompressed.length());
}
}

View File

@@ -0,0 +1,104 @@
package com.thealgorithms.compression;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.junit.jupiter.api.Test;
class LZWTest {
@Test
void testNullAndEmptyInputs() {
// Test that a null input to compress returns an empty list
assertTrue(LZW.compress(null).isEmpty());
// Test that a null input to decompress returns an empty string
assertEquals("", LZW.decompress(null));
// Test that an empty input to compress returns an empty list
assertTrue(LZW.compress("").isEmpty());
// Test that an empty input to decompress returns an empty string
assertEquals("", LZW.decompress(Collections.emptyList()));
}
@Test
void testCompressionAndDecompressionWithSimpleString() {
// Test a classic example string
String original = "TOBEORNOTTOBEORTOBEORNOT";
List<Integer> compressed = LZW.compress(original);
// Create the expected output list
List<Integer> expectedOutput = List.of(84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263);
// This assertion will fail if the output is not what we expect
assertEquals(expectedOutput, compressed);
// This assertion ensures the decompressed string is correct
String decompressed = LZW.decompress(compressed);
assertEquals(original, decompressed);
}
@Test
void testCompressionWithRepeatedChars() {
// Test a string with long runs of the same character
String original = "AAAAABBBBBAAAAA";
List<Integer> compressed = LZW.compress(original);
String decompressed = LZW.decompress(compressed);
assertEquals(original, decompressed);
}
@Test
void testCompressionWithUniqueChars() {
// Test a string with no repetitions
String original = "ABCDEFG";
List<Integer> compressed = LZW.compress(original);
String decompressed = LZW.decompress(compressed);
assertEquals(original, decompressed);
}
@Test
void testSymmetry() {
// Test that compressing and then decompressing a complex string returns the
// original
String original = "THE_QUICK_BROWN_FOX_JUMPS_OVER_THE_LAZY_DOG";
List<Integer> compressed = LZW.compress(original);
String decompressed = LZW.decompress(compressed);
assertEquals(original, decompressed);
// Another symmetry test with special characters and patterns
String original2 = "ababcbababa";
List<Integer> compressed2 = LZW.compress(original2);
String decompressed2 = LZW.decompress(compressed2);
assertEquals(original2, decompressed2);
}
@Test
void testInvalidCompressedData() {
// Test that decompressing with an invalid code throws IllegalArgumentException
// Create a list with a code that doesn't exist in the dictionary
List<Integer> invalidCompressed = new ArrayList<>();
invalidCompressed.add(65); // 'A' - valid
invalidCompressed.add(999); // Invalid code (not in dictionary)
// This should throw IllegalArgumentException with message "Bad compressed k: 999"
IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> LZW.decompress(invalidCompressed));
assertTrue(exception.getMessage().contains("Bad compressed k: 999"));
}
@Test
void testDecompressionWithGapInDictionary() {
// Test with codes that skip dictionary entries
List<Integer> invalidCompressed = new ArrayList<>();
invalidCompressed.add(84); // 'T' - valid
invalidCompressed.add(500); // Way beyond current dictionary size
// This should throw IllegalArgumentException
assertThrows(IllegalArgumentException.class, () -> LZW.decompress(invalidCompressed));
}
}