From 236f24c9529487aa72ce889f513c4755aff81d39 Mon Sep 17 00:00:00 2001 From: Ayush Bhardwaj <116327330+Ayush7970@users.noreply.github.com> Date: Mon, 27 Jan 2025 12:53:22 -0600 Subject: [PATCH] adding huffman encoding project --- huffman.py/huffman_encoding/README.md | 38 +++++++ .../huffman_encoding/huffman_encode_decode.py | 106 ++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 huffman.py/huffman_encoding/README.md create mode 100644 huffman.py/huffman_encoding/huffman_encode_decode.py diff --git a/huffman.py/huffman_encoding/README.md b/huffman.py/huffman_encoding/README.md new file mode 100644 index 00000000..11d83f52 --- /dev/null +++ b/huffman.py/huffman_encoding/README.md @@ -0,0 +1,38 @@ +# Huffman Coding in Python + +## Overview +This repository contains a Python implementation of the Huffman coding algorithm for text compression and decompression. The implementation is designed to help beginners understand the concept of data encoding using binary trees. + +## Author +- Ayush Bhardwaj + +## Features +- Build a frequency map of characters from the input text. +- Construct a Huffman Tree based on character frequencies. +- Generate Huffman codes for each character. +- Encode and decode text using the generated Huffman codes. + +## Setup +To run this project, you'll need Python installed on your machine. You can download Python from [here](https://www.python.org/downloads/). + +1. Clone this repository or download the files to your local machine. +2. Navigate to the directory containing the project files in your terminal. +3. Run the script using Python: + +## How It Works +The `huffman_coding.py` script performs several key functions: +- **Frequency Map Building:** Counts the frequency of each character in the provided text. +- **Huffman Tree Construction:** Uses a priority queue to build a tree where each node represents a character and its frequency. Nodes with the lowest frequency are combined and placed back into the queue until only one node remains, representing the root of the tree. +- **Encoding Map Construction:** Traverses the Huffman Tree to assign a unique binary code to each character, where the path to a character determines its code. +- **Text Encoding and Decoding:** Converts the original text into a string of binary codes and then back into the original text using the Huffman Tree. + +## Contributing +Contributions to this project are welcome! You can contribute by: +- Improving the efficiency of the existing implementation. +- Adding new features, such as file input/output handling to work with different file types. +- Refactoring the code for clarity and maintainability. + +## License +This project is open source and available under the [MIT License](LICENSE). + +Enjoy exploring and enhancing the Huffman coding project! diff --git a/huffman.py/huffman_encoding/huffman_encode_decode.py b/huffman.py/huffman_encoding/huffman_encode_decode.py new file mode 100644 index 00000000..112a1088 --- /dev/null +++ b/huffman.py/huffman_encoding/huffman_encode_decode.py @@ -0,0 +1,106 @@ +import heapq + +class HuffmanNode: + def __init__(self, char, freq): + self.char = char + self.freq = freq + self.left = None + self.right = None + + # Comparison function for priority queue to ensure the node with the lower frequency comes first + def __lt__(self, other): + return self.freq < other.freq + +def build_frequency_map(text): + frequency = {} + for char in text: + frequency[char] = 1 + frequency.get(char, 0) + return frequency + +def build_huffman_tree(frequency_map): + # Create a priority queue from the frequency map + priority_queue = [HuffmanNode(char, freq) for char, freq in frequency_map.items()] + heapq.heapify(priority_queue) + + # Combine nodes until the entire tree is built + while len(priority_queue) > 1: + left = heapq.heappop(priority_queue) + right = heapq.heappop(priority_queue) + + # Create a new merged node and push it back into the priority queue + merged = HuffmanNode(None, left.freq + right.freq) + merged.left = left + merged.right = right + heapq.heappush(priority_queue, merged) + + return priority_queue[0] + +def build_encoding_map(root, path="", encoding_map=None): + if encoding_map is None: + encoding_map = {} + + # Assign a binary code to each character + if root.char is not None: + encoding_map[root.char] = path + else: + build_encoding_map(root.left, path + "0", encoding_map) + build_encoding_map(root.right, path + "1", encoding_map) + + return encoding_map + +def encode_text(text, encoding_map): + encoded_output = "" + for char in text: + encoded_output += encoding_map[char] + return encoded_output + +def decode_text(encoded_text, root): + current_node = root + decoded_output = "" + + # Decode the text by navigating the Huffman tree + for bit in encoded_text: + if bit == '0': + current_node = current_node.left + else: + current_node = current_node.right + + if current_node.char is not None: + decoded_output += current_node.char + current_node = root + + return decoded_output + +def check_result(s1, s2): + print("Test case Passed" if s1 == s2 else "Test case Failed!!!!!") + +def read_file_to_string(filepath): + try: + with open(filepath, 'r', encoding='utf-8') as file: + return file.read() + except FileNotFoundError: + print(f"Error: The file at {filepath} does not exist.") + return None + except Exception as e: + print(f"An error occurred: {e}") + return None + +# Example usage +text = "this is an example of how Huffman encoding works" + +# Generate frequency map +frequency_map = build_frequency_map(text) +# Build Huffman tree +huffman_tree = build_huffman_tree(frequency_map) +# Generate encoding map +encoding_map = build_encoding_map(huffman_tree) + +# Encode and decode the text +encoded_text = encode_text(text, encoding_map) +decoded_text = decode_text(encoded_text, huffman_tree) + +# Output results +print("Original:", text) +print("Encoded:", encoded_text) +print("Decoded:", decoded_text) +check_result(text, decoded_text)