mirror of
https://github.com/TheAlgorithms/Java.git
synced 2025-12-19 07:00:35 +08:00
feat(compression): Add LZW and Arithmetic Coding algorithms (#6799)
* feat(compression): Add LZW and Arithmetic Coding algorithms * test(compression): Improve test coverage for LZW and ArithmeticCoding * style(compression): fix code style
This commit is contained in:
@@ -0,0 +1,157 @@
|
||||
package com.thealgorithms.compression;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.MathContext;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* An implementation of the Arithmetic Coding algorithm.
|
||||
*
|
||||
* <p>
|
||||
* Arithmetic coding is a form of entropy encoding used in lossless data
|
||||
* compression. It encodes an entire message into a single number, a fraction n
|
||||
* where (0.0 <= n < 1.0). Unlike Huffman coding, which assigns a specific
|
||||
* bit sequence to each symbol, arithmetic coding represents the message as a
|
||||
* sub-interval of the [0, 1) interval.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* This implementation uses BigDecimal for precision to handle the shrinking
|
||||
* intervals, making it suitable for educational purposes to demonstrate the
|
||||
* core logic.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Time Complexity: O(n*m) for compression and decompression where n is the
|
||||
* length of the input and m is the number of unique symbols, due to the need
|
||||
* to calculate symbol probabilities.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* References:
|
||||
* <ul>
|
||||
* <li><a href="https://en.wikipedia.org/wiki/Arithmetic_coding">Wikipedia:
|
||||
* Arithmetic coding</a></li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*/
|
||||
public final class ArithmeticCoding {
|
||||
|
||||
private ArithmeticCoding() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Compresses a string using the Arithmetic Coding algorithm.
|
||||
*
|
||||
* @param uncompressed The string to be compressed.
|
||||
* @return The compressed representation as a BigDecimal number.
|
||||
* @throws IllegalArgumentException if the input string is null or empty.
|
||||
*/
|
||||
public static BigDecimal compress(String uncompressed) {
|
||||
if (uncompressed == null || uncompressed.isEmpty()) {
|
||||
throw new IllegalArgumentException("Input string cannot be null or empty.");
|
||||
}
|
||||
|
||||
Map<Character, Symbol> probabilityTable = calculateProbabilities(uncompressed);
|
||||
|
||||
BigDecimal low = BigDecimal.ZERO;
|
||||
BigDecimal high = BigDecimal.ONE;
|
||||
|
||||
for (char symbol : uncompressed.toCharArray()) {
|
||||
BigDecimal range = high.subtract(low);
|
||||
Symbol sym = probabilityTable.get(symbol);
|
||||
|
||||
high = low.add(range.multiply(sym.high()));
|
||||
low = low.add(range.multiply(sym.low()));
|
||||
}
|
||||
|
||||
return low; // Return the lower bound of the final interval
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompresses a BigDecimal number back into the original string.
|
||||
*
|
||||
* @param compressed The compressed BigDecimal number.
|
||||
* @param length The length of the original uncompressed string.
|
||||
* @param probabilityTable The probability table used during compression.
|
||||
* @return The original, uncompressed string.
|
||||
*/
|
||||
public static String decompress(BigDecimal compressed, int length, Map<Character, Symbol> probabilityTable) {
|
||||
StringBuilder decompressed = new StringBuilder();
|
||||
|
||||
// Create a sorted list of symbols for deterministic decompression, matching the
|
||||
// order used in calculateProbabilities
|
||||
List<Map.Entry<Character, Symbol>> sortedSymbols = new ArrayList<>(probabilityTable.entrySet());
|
||||
sortedSymbols.sort(Map.Entry.comparingByKey());
|
||||
|
||||
BigDecimal low = BigDecimal.ZERO;
|
||||
BigDecimal high = BigDecimal.ONE;
|
||||
|
||||
for (int i = 0; i < length; i++) {
|
||||
BigDecimal range = high.subtract(low);
|
||||
|
||||
// Find which symbol the compressed value falls into
|
||||
for (Map.Entry<Character, Symbol> entry : sortedSymbols) {
|
||||
Symbol sym = entry.getValue();
|
||||
|
||||
// Calculate the actual range for this symbol in the current interval
|
||||
BigDecimal symLow = low.add(range.multiply(sym.low()));
|
||||
BigDecimal symHigh = low.add(range.multiply(sym.high()));
|
||||
|
||||
// Check if the compressed value falls within this symbol's range
|
||||
if (compressed.compareTo(symLow) >= 0 && compressed.compareTo(symHigh) < 0) {
|
||||
decompressed.append(entry.getKey());
|
||||
|
||||
// Update the interval for the next iteration
|
||||
low = symLow;
|
||||
high = symHigh;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return decompressed.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the frequency and probability range for each character in the
|
||||
* input string in a deterministic order.
|
||||
*
|
||||
* @param text The input string.
|
||||
* @return A map from each character to a Symbol object containing its
|
||||
* probability range.
|
||||
*/
|
||||
public static Map<Character, Symbol> calculateProbabilities(String text) {
|
||||
Map<Character, Integer> frequencies = new HashMap<>();
|
||||
for (char c : text.toCharArray()) {
|
||||
frequencies.put(c, frequencies.getOrDefault(c, 0) + 1);
|
||||
}
|
||||
|
||||
// Sort the characters to ensure a deterministic order for the probability table
|
||||
List<Character> sortedKeys = new ArrayList<>(frequencies.keySet());
|
||||
Collections.sort(sortedKeys);
|
||||
|
||||
Map<Character, Symbol> probabilityTable = new HashMap<>();
|
||||
BigDecimal currentLow = BigDecimal.ZERO;
|
||||
int total = text.length();
|
||||
|
||||
for (char symbol : sortedKeys) {
|
||||
BigDecimal probability = BigDecimal.valueOf(frequencies.get(symbol)).divide(BigDecimal.valueOf(total), MathContext.DECIMAL128);
|
||||
BigDecimal high = currentLow.add(probability);
|
||||
probabilityTable.put(symbol, new Symbol(currentLow, high));
|
||||
currentLow = high;
|
||||
}
|
||||
|
||||
return probabilityTable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class to store the probability range [low, high) for a symbol.
|
||||
*/
|
||||
public record Symbol(BigDecimal low, BigDecimal high) {
|
||||
}
|
||||
}
|
||||
136
src/main/java/com/thealgorithms/compression/LZW.java
Normal file
136
src/main/java/com/thealgorithms/compression/LZW.java
Normal file
@@ -0,0 +1,136 @@
|
||||
package com.thealgorithms.compression;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* An implementation of the Lempel-Ziv-Welch (LZW) algorithm.
|
||||
*
|
||||
* <p>
|
||||
* LZW is a universal lossless data compression algorithm created by Abraham
|
||||
* Lempel, Jacob Ziv, and Terry Welch. It works by building a dictionary of
|
||||
* strings encountered during compression and replacing occurrences of those
|
||||
* strings with a shorter code.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* This implementation handles standard ASCII characters and provides methods for
|
||||
* both compression and decompression.
|
||||
* <ul>
|
||||
* <li>Compressing "TOBEORNOTTOBEORTOBEORNOT" results in a list of integer
|
||||
* codes.</li>
|
||||
* <li>Decompressing that list of codes results back in the original
|
||||
* string.</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Time Complexity: O(n) for both compression and decompression, where n is the
|
||||
* length of the input string.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* References:
|
||||
* <ul>
|
||||
* <li><a href="https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch">Wikipedia:
|
||||
* Lempel–Ziv–Welch</a></li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*/
|
||||
public final class LZW {
|
||||
|
||||
/**
|
||||
* Private constructor to prevent instantiation of this utility class.
|
||||
*/
|
||||
private LZW() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Compresses a string using the LZW algorithm.
|
||||
*
|
||||
* @param uncompressed The string to be compressed. Can be null.
|
||||
* @return A list of integers representing the compressed data. Returns an empty
|
||||
* list if the input is null or empty.
|
||||
*/
|
||||
public static List<Integer> compress(String uncompressed) {
|
||||
if (uncompressed == null || uncompressed.isEmpty()) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
// Initialize dictionary with single characters (ASCII 0-255)
|
||||
int dictSize = 256;
|
||||
Map<String, Integer> dictionary = new HashMap<>();
|
||||
for (int i = 0; i < dictSize; i++) {
|
||||
dictionary.put("" + (char) i, i);
|
||||
}
|
||||
|
||||
String w = "";
|
||||
List<Integer> result = new ArrayList<>();
|
||||
for (char c : uncompressed.toCharArray()) {
|
||||
String wc = w + c;
|
||||
if (dictionary.containsKey(wc)) {
|
||||
// If the new string is in the dictionary, extend the current string
|
||||
w = wc;
|
||||
} else {
|
||||
// Otherwise, output the code for the current string
|
||||
result.add(dictionary.get(w));
|
||||
// Add the new string to the dictionary
|
||||
dictionary.put(wc, dictSize++);
|
||||
// Start a new current string
|
||||
w = "" + c;
|
||||
}
|
||||
}
|
||||
|
||||
// Output the code for the last remaining string
|
||||
result.add(dictionary.get(w));
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompresses a list of integers back into a string using the LZW algorithm.
|
||||
*
|
||||
* @param compressed A list of integers representing the compressed data. Can be
|
||||
* null.
|
||||
* @return The original, uncompressed string. Returns an empty string if the
|
||||
* input is null or empty.
|
||||
*/
|
||||
public static String decompress(List<Integer> compressed) {
|
||||
if (compressed == null || compressed.isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// Initialize dictionary with single characters (ASCII 0-255)
|
||||
int dictSize = 256;
|
||||
Map<Integer, String> dictionary = new HashMap<>();
|
||||
for (int i = 0; i < dictSize; i++) {
|
||||
dictionary.put(i, "" + (char) i);
|
||||
}
|
||||
|
||||
// Decompress the first code
|
||||
String w = "" + (char) (int) compressed.removeFirst();
|
||||
StringBuilder result = new StringBuilder(w);
|
||||
|
||||
for (int k : compressed) {
|
||||
String entry;
|
||||
if (dictionary.containsKey(k)) {
|
||||
// The code is in the dictionary
|
||||
entry = dictionary.get(k);
|
||||
} else if (k == dictSize) {
|
||||
// Special case for sequences like "ababab"
|
||||
entry = w + w.charAt(0);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Bad compressed k: " + k);
|
||||
}
|
||||
|
||||
result.append(entry);
|
||||
|
||||
// Add new sequence to the dictionary
|
||||
dictionary.put(dictSize++, w + entry.charAt(0));
|
||||
|
||||
w = entry;
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user