From 236f24c9529487aa72ce889f513c4755aff81d39 Mon Sep 17 00:00:00 2001
From: Ayush Bhardwaj <116327330+Ayush7970@users.noreply.github.com>
Date: Mon, 27 Jan 2025 12:53:22 -0600
Subject: [PATCH] adding huffman encoding project

---
 huffman.py/huffman_encoding/README.md         |  38 +++++++
 .../huffman_encoding/huffman_encode_decode.py | 106 ++++++++++++++++++
 2 files changed, 144 insertions(+)
 create mode 100644 huffman.py/huffman_encoding/README.md
 create mode 100644 huffman.py/huffman_encoding/huffman_encode_decode.py

diff --git a/huffman.py/huffman_encoding/README.md b/huffman.py/huffman_encoding/README.md
new file mode 100644
index 00000000..11d83f52
--- /dev/null
+++ b/huffman.py/huffman_encoding/README.md
@@ -0,0 +1,38 @@
+# Huffman Coding in Python
+
+## Overview
+This repository contains a Python implementation of the Huffman coding algorithm for text compression and decompression. The implementation is designed to help beginners understand the concept of data encoding using binary trees.
+
+## Author
+- Ayush Bhardwaj
+
+## Features
+- Build a frequency map of characters from the input text.
+- Construct a Huffman Tree based on character frequencies.
+- Generate Huffman codes for each character.
+- Encode and decode text using the generated Huffman codes.
+
+## Setup
+To run this project, you'll need Python installed on your machine. You can download Python from [here](https://www.python.org/downloads/).
+
+1. Clone this repository or download the files to your local machine.
+2. Navigate to the directory containing the project files in your terminal.
+3. Run the script using Python:
+
+## How It Works
+The `huffman_coding.py` script performs several key functions:
+- **Frequency Map Building:** Counts the frequency of each character in the provided text.
+- **Huffman Tree Construction:** Uses a priority queue to build a tree where each node represents a character and its frequency. Nodes with the lowest frequency are combined and placed back into the queue until only one node remains, representing the root of the tree.
+- **Encoding Map Construction:** Traverses the Huffman Tree to assign a unique binary code to each character, where the path to a character determines its code.
+- **Text Encoding and Decoding:** Converts the original text into a string of binary codes and then back into the original text using the Huffman Tree.
+
+## Contributing
+Contributions to this project are welcome! You can contribute by:
+- Improving the efficiency of the existing implementation.
+- Adding new features, such as file input/output handling to work with different file types.
+- Refactoring the code for clarity and maintainability.
+
+## License
+This project is open source and available under the [MIT License](LICENSE).
+
+Enjoy exploring and enhancing the Huffman coding project!
diff --git a/huffman.py/huffman_encoding/huffman_encode_decode.py b/huffman.py/huffman_encoding/huffman_encode_decode.py
new file mode 100644
index 00000000..112a1088
--- /dev/null
+++ b/huffman.py/huffman_encoding/huffman_encode_decode.py
@@ -0,0 +1,106 @@
+import heapq
+
+class HuffmanNode:
+    def __init__(self, char, freq):
+        self.char = char
+        self.freq = freq
+        self.left = None
+        self.right = None
+
+    # Comparison function for priority queue to ensure the node with the lower frequency comes first
+    def __lt__(self, other):
+        return self.freq < other.freq
+
+def build_frequency_map(text):
+    frequency = {}
+    for char in text:
+        frequency[char] = 1 + frequency.get(char, 0)
+    return frequency
+
+def build_huffman_tree(frequency_map):
+    # Create a priority queue from the frequency map
+    priority_queue = [HuffmanNode(char, freq) for char, freq in frequency_map.items()]
+    heapq.heapify(priority_queue)
+    
+    # Combine nodes until the entire tree is built
+    while len(priority_queue) > 1:
+        left = heapq.heappop(priority_queue)
+        right = heapq.heappop(priority_queue)
+        
+        # Create a new merged node and push it back into the priority queue
+        merged = HuffmanNode(None, left.freq + right.freq)
+        merged.left = left
+        merged.right = right
+        heapq.heappush(priority_queue, merged)
+
+    return priority_queue[0]
+
+def build_encoding_map(root, path="", encoding_map=None):
+    if encoding_map is None:
+        encoding_map = {}
+    
+    # Assign a binary code to each character
+    if root.char is not None:
+        encoding_map[root.char] = path
+    else:
+        build_encoding_map(root.left, path + "0", encoding_map)
+        build_encoding_map(root.right, path + "1", encoding_map)
+    
+    return encoding_map
+
+def encode_text(text, encoding_map):
+    encoded_output = ""
+    for char in text:
+        encoded_output += encoding_map[char]
+    return encoded_output
+
+def decode_text(encoded_text, root):
+    current_node = root
+    decoded_output = ""
+    
+    # Decode the text by navigating the Huffman tree
+    for bit in encoded_text:
+        if bit == '0':
+            current_node = current_node.left
+        else:
+            current_node = current_node.right
+        
+        if current_node.char is not None:
+            decoded_output += current_node.char
+            current_node = root
+    
+    return decoded_output
+
+def check_result(s1, s2):
+    print("Test case Passed" if s1 == s2 else "Test case Failed!!!!!")
+
+def read_file_to_string(filepath):
+    try:
+        with open(filepath, 'r', encoding='utf-8') as file:
+            return file.read()
+    except FileNotFoundError:
+        print(f"Error: The file at {filepath} does not exist.")
+        return None
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+
+# Example usage
+text = "this is an example of how Huffman encoding works"
+
+# Generate frequency map
+frequency_map = build_frequency_map(text)
+# Build Huffman tree
+huffman_tree = build_huffman_tree(frequency_map)
+# Generate encoding map
+encoding_map = build_encoding_map(huffman_tree)
+
+# Encode and decode the text
+encoded_text = encode_text(text, encoding_map)
+decoded_text = decode_text(encoded_text, huffman_tree)
+
+# Output results
+print("Original:", text)
+print("Encoded:", encoded_text)
+print("Decoded:", decoded_text)
+check_result(text, decoded_text)