Skip to content

Commit 490c540

Browse files
Merge branch 'master' into my-mergeaccounts-branch
2 parents 6b38e8e + d5289b9 commit 490c540

File tree

8 files changed

+912
-9
lines changed

8 files changed

+912
-9
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
package com.thealgorithms.compression;
2+
3+
import java.math.BigDecimal;
4+
import java.math.MathContext;
5+
import java.util.ArrayList;
6+
import java.util.Collections;
7+
import java.util.HashMap;
8+
import java.util.List;
9+
import java.util.Map;
10+
11+
/**
12+
* An implementation of the Arithmetic Coding algorithm.
13+
*
14+
* <p>
15+
* Arithmetic coding is a form of entropy encoding used in lossless data
16+
* compression. It encodes an entire message into a single number, a fraction n
17+
* where (0.0 <= n < 1.0). Unlike Huffman coding, which assigns a specific
18+
* bit sequence to each symbol, arithmetic coding represents the message as a
19+
* sub-interval of the [0, 1) interval.
20+
* </p>
21+
*
22+
* <p>
23+
* This implementation uses BigDecimal for precision to handle the shrinking
24+
* intervals, making it suitable for educational purposes to demonstrate the
25+
* core logic.
26+
* </p>
27+
*
28+
* <p>
29+
* Time Complexity: O(n*m) for compression and decompression where n is the
30+
* length of the input and m is the number of unique symbols, due to the need
31+
* to calculate symbol probabilities.
32+
* </p>
33+
*
34+
* <p>
35+
* References:
36+
* <ul>
37+
* <li><a href="https://en.wikipedia.org/wiki/Arithmetic_coding">Wikipedia:
38+
* Arithmetic coding</a></li>
39+
* </ul>
40+
* </p>
41+
*/
42+
public final class ArithmeticCoding {
43+
44+
private ArithmeticCoding() {
45+
}
46+
47+
/**
48+
* Compresses a string using the Arithmetic Coding algorithm.
49+
*
50+
* @param uncompressed The string to be compressed.
51+
* @return The compressed representation as a BigDecimal number.
52+
* @throws IllegalArgumentException if the input string is null or empty.
53+
*/
54+
public static BigDecimal compress(String uncompressed) {
55+
if (uncompressed == null || uncompressed.isEmpty()) {
56+
throw new IllegalArgumentException("Input string cannot be null or empty.");
57+
}
58+
59+
Map<Character, Symbol> probabilityTable = calculateProbabilities(uncompressed);
60+
61+
BigDecimal low = BigDecimal.ZERO;
62+
BigDecimal high = BigDecimal.ONE;
63+
64+
for (char symbol : uncompressed.toCharArray()) {
65+
BigDecimal range = high.subtract(low);
66+
Symbol sym = probabilityTable.get(symbol);
67+
68+
high = low.add(range.multiply(sym.high()));
69+
low = low.add(range.multiply(sym.low()));
70+
}
71+
72+
return low; // Return the lower bound of the final interval
73+
}
74+
75+
/**
76+
* Decompresses a BigDecimal number back into the original string.
77+
*
78+
* @param compressed The compressed BigDecimal number.
79+
* @param length The length of the original uncompressed string.
80+
* @param probabilityTable The probability table used during compression.
81+
* @return The original, uncompressed string.
82+
*/
83+
public static String decompress(BigDecimal compressed, int length, Map<Character, Symbol> probabilityTable) {
84+
StringBuilder decompressed = new StringBuilder();
85+
86+
// Create a sorted list of symbols for deterministic decompression, matching the
87+
// order used in calculateProbabilities
88+
List<Map.Entry<Character, Symbol>> sortedSymbols = new ArrayList<>(probabilityTable.entrySet());
89+
sortedSymbols.sort(Map.Entry.comparingByKey());
90+
91+
BigDecimal low = BigDecimal.ZERO;
92+
BigDecimal high = BigDecimal.ONE;
93+
94+
for (int i = 0; i < length; i++) {
95+
BigDecimal range = high.subtract(low);
96+
97+
// Find which symbol the compressed value falls into
98+
for (Map.Entry<Character, Symbol> entry : sortedSymbols) {
99+
Symbol sym = entry.getValue();
100+
101+
// Calculate the actual range for this symbol in the current interval
102+
BigDecimal symLow = low.add(range.multiply(sym.low()));
103+
BigDecimal symHigh = low.add(range.multiply(sym.high()));
104+
105+
// Check if the compressed value falls within this symbol's range
106+
if (compressed.compareTo(symLow) >= 0 && compressed.compareTo(symHigh) < 0) {
107+
decompressed.append(entry.getKey());
108+
109+
// Update the interval for the next iteration
110+
low = symLow;
111+
high = symHigh;
112+
break;
113+
}
114+
}
115+
}
116+
117+
return decompressed.toString();
118+
}
119+
120+
/**
121+
* Calculates the frequency and probability range for each character in the
122+
* input string in a deterministic order.
123+
*
124+
* @param text The input string.
125+
* @return A map from each character to a Symbol object containing its
126+
* probability range.
127+
*/
128+
public static Map<Character, Symbol> calculateProbabilities(String text) {
129+
Map<Character, Integer> frequencies = new HashMap<>();
130+
for (char c : text.toCharArray()) {
131+
frequencies.put(c, frequencies.getOrDefault(c, 0) + 1);
132+
}
133+
134+
// Sort the characters to ensure a deterministic order for the probability table
135+
List<Character> sortedKeys = new ArrayList<>(frequencies.keySet());
136+
Collections.sort(sortedKeys);
137+
138+
Map<Character, Symbol> probabilityTable = new HashMap<>();
139+
BigDecimal currentLow = BigDecimal.ZERO;
140+
int total = text.length();
141+
142+
for (char symbol : sortedKeys) {
143+
BigDecimal probability = BigDecimal.valueOf(frequencies.get(symbol)).divide(BigDecimal.valueOf(total), MathContext.DECIMAL128);
144+
BigDecimal high = currentLow.add(probability);
145+
probabilityTable.put(symbol, new Symbol(currentLow, high));
146+
currentLow = high;
147+
}
148+
149+
return probabilityTable;
150+
}
151+
152+
/**
153+
* Helper class to store the probability range [low, high) for a symbol.
154+
*/
155+
public record Symbol(BigDecimal low, BigDecimal high) {
156+
}
157+
}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
package com.thealgorithms.compression;
2+
3+
import java.util.ArrayList;
4+
import java.util.HashMap;
5+
import java.util.List;
6+
import java.util.Map;
7+
8+
/**
9+
* An implementation of the Lempel-Ziv-Welch (LZW) algorithm.
10+
*
11+
* <p>
12+
* LZW is a universal lossless data compression algorithm created by Abraham
13+
* Lempel, Jacob Ziv, and Terry Welch. It works by building a dictionary of
14+
* strings encountered during compression and replacing occurrences of those
15+
* strings with a shorter code.
16+
* </p>
17+
*
18+
* <p>
19+
* This implementation handles standard ASCII characters and provides methods for
20+
* both compression and decompression.
21+
* <ul>
22+
* <li>Compressing "TOBEORNOTTOBEORTOBEORNOT" results in a list of integer
23+
* codes.</li>
24+
* <li>Decompressing that list of codes results back in the original
25+
* string.</li>
26+
* </ul>
27+
* </p>
28+
*
29+
* <p>
30+
* Time Complexity: O(n) for both compression and decompression, where n is the
31+
* length of the input string.
32+
* </p>
33+
*
34+
* <p>
35+
* References:
36+
* <ul>
37+
* <li><a href="https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch">Wikipedia:
38+
* Lempel–Ziv–Welch</a></li>
39+
* </ul>
40+
* </p>
41+
*/
42+
public final class LZW {
43+
44+
/**
45+
* Private constructor to prevent instantiation of this utility class.
46+
*/
47+
private LZW() {
48+
}
49+
50+
/**
51+
* Compresses a string using the LZW algorithm.
52+
*
53+
* @param uncompressed The string to be compressed. Can be null.
54+
* @return A list of integers representing the compressed data. Returns an empty
55+
* list if the input is null or empty.
56+
*/
57+
public static List<Integer> compress(String uncompressed) {
58+
if (uncompressed == null || uncompressed.isEmpty()) {
59+
return new ArrayList<>();
60+
}
61+
62+
// Initialize dictionary with single characters (ASCII 0-255)
63+
int dictSize = 256;
64+
Map<String, Integer> dictionary = new HashMap<>();
65+
for (int i = 0; i < dictSize; i++) {
66+
dictionary.put("" + (char) i, i);
67+
}
68+
69+
String w = "";
70+
List<Integer> result = new ArrayList<>();
71+
for (char c : uncompressed.toCharArray()) {
72+
String wc = w + c;
73+
if (dictionary.containsKey(wc)) {
74+
// If the new string is in the dictionary, extend the current string
75+
w = wc;
76+
} else {
77+
// Otherwise, output the code for the current string
78+
result.add(dictionary.get(w));
79+
// Add the new string to the dictionary
80+
dictionary.put(wc, dictSize++);
81+
// Start a new current string
82+
w = "" + c;
83+
}
84+
}
85+
86+
// Output the code for the last remaining string
87+
result.add(dictionary.get(w));
88+
return result;
89+
}
90+
91+
/**
92+
* Decompresses a list of integers back into a string using the LZW algorithm.
93+
*
94+
* @param compressed A list of integers representing the compressed data. Can be
95+
* null.
96+
* @return The original, uncompressed string. Returns an empty string if the
97+
* input is null or empty.
98+
*/
99+
public static String decompress(List<Integer> compressed) {
100+
if (compressed == null || compressed.isEmpty()) {
101+
return "";
102+
}
103+
104+
// Initialize dictionary with single characters (ASCII 0-255)
105+
int dictSize = 256;
106+
Map<Integer, String> dictionary = new HashMap<>();
107+
for (int i = 0; i < dictSize; i++) {
108+
dictionary.put(i, "" + (char) i);
109+
}
110+
111+
// Decompress the first code
112+
String w = "" + (char) (int) compressed.removeFirst();
113+
StringBuilder result = new StringBuilder(w);
114+
115+
for (int k : compressed) {
116+
String entry;
117+
if (dictionary.containsKey(k)) {
118+
// The code is in the dictionary
119+
entry = dictionary.get(k);
120+
} else if (k == dictSize) {
121+
// Special case for sequences like "ababab"
122+
entry = w + w.charAt(0);
123+
} else {
124+
throw new IllegalArgumentException("Bad compressed k: " + k);
125+
}
126+
127+
result.append(entry);
128+
129+
// Add new sequence to the dictionary
130+
dictionary.put(dictSize++, w + entry.charAt(0));
131+
132+
w = entry;
133+
}
134+
return result.toString();
135+
}
136+
}

0 commit comments

Comments
 (0)