careercup · awake416 · Jun 3, 2025 · Jun 3, 2025
diff --git a/chapter_01/p01_is_unique.py b/chapter_01/p01_is_unique.py
@@ -1,71 +1,94 @@
 import time
 import unittest
 from collections import defaultdict
+from typing import Callable
 
 
-def is_unique_chars_algorithmic(string):
-    # Assuming character set is ASCII (128 characters)
-    if len(string) > 128:
+def is_unique_chars_algorithmic(string: str) -> bool:
+    """
+    Determines if a string has all unique characters using a boolean array.
+    Assumes the character set is ASCII (128 characters).
+    Time complexity: O(N), where N is the length of the string (at most 128).
+    Space complexity: O(1) (boolean array size is fixed at 128).
+    """
+    if len(string) > 128: # Based on ASCII assumption
         return False
 
-    # this is a pythonic and faster way to initialize an array with a fixed value.
-    #  careful though it won't work for a doubly nested array
-    char_set = [False] * 128
-    for char in string:
-        val = ord(char)
-        if char_set[val]:
+    char_set: list[bool] = [False] * 128 # Boolean array to track characters
+    for char_code in map(ord, string):
+        if char_code > 127:
+            # Character out of assumed ASCII range
+            return False # Or raise ValueError("Non-ASCII character found")
+        if char_set[char_code]:
             # Char already found in string
             return False
-        char_set[val] = True
+        char_set[char_code] = True
 
     return True
 
 
-def is_unique_chars_pythonic(string):
+def is_unique_chars_pythonic(string: str) -> bool:
+    """
+    Determines if a string has all unique characters using Python's set properties.
+    Time complexity: O(N) on average, where N is the length of the string.
+    Space complexity: O(N) in the worst case for the set.
+    """
     return len(set(string)) == len(string)
 
 
-def is_unique_bit_vector(string):
-    """Uses bitwise operation instead of extra data structures."""
-    # Assuming character set is ASCII (128 characters)
-    if len(string) > 128:
+def is_unique_bit_vector(string: str) -> bool:
+    """
+    Determines if a string has all unique characters using a bit vector.
+    Assumes the character set is ASCII (128 characters, fitting within integer bits if extended).
+    This implementation assumes characters 'a' through 'z' or a similar limited range
+    if not strictly ASCII values that might exceed typical integer bit sizes for a direct bit vector.
+    For full ASCII (0-127), this approach is fine.
+    Time complexity: O(N).
+    Space complexity: O(1).
+    """
+    # Assuming character set is ASCII (128 characters) for this specific check
+    if len(string) > 128: # Optimization for this assumption
         return False
 
-    checker = 0
-    for c in string:
-        val = ord(c)
-        if (checker & (1 << val)) > 0:
+    checker: int = 0
+    for char_code in map(ord, string):
+        if char_code > 127:
+            # Character out of assumed ASCII range for this bit vector approach
+            return False # Or raise ValueError("Non-ASCII character found")
+        if (checker & (1 << char_code)) > 0:
             return False
-        checker |= 1 << val
+        checker |= (1 << char_code)
     return True
 
 
 def is_unique_chars_using_dictionary(string: str) -> bool:
-    character_counts = {}
+    """
+    Determines if a string has all unique characters using a dictionary (hash map).
+    Time complexity: O(N) on average, where N is the length of the string.
+    Space complexity: O(K), where K is the number of unique characters (at most N).
+    """
+    character_counts: dict[str, bool] = {} # Stores seen characters
     for char in string:
         if char in character_counts:
             return False
-        character_counts[char] = 1
+        character_counts[char] = True # Value can be True, or a count, doesn't matter
     return True
 
 
-def is_unique_chars_using_set(string: str) -> bool:
-    characters_seen = set()
-    for char in string:
-        if char in characters_seen:
-            return False
-        characters_seen.add(char)
-    return True
-
-
-# O(NlogN)
 def is_unique_chars_sorting(string: str) -> bool:
+    """
+    Determines if a string has all unique characters by sorting it first.
+    Time complexity: O(N log N) due to sorting.
+    Space complexity: O(N) or O(log N) depending on sort implementation (Python's Timsort is O(N)).
+    """
+    if not string:
+        return True
     sorted_string = sorted(string)
-    last_character = None
-    for char in sorted_string:
-        if char == last_character:
+    last_character = sorted_string[0]
+    for i in range(1, len(sorted_string)):
+        if sorted_string[i] == last_character:
             return False
-        last_character = char
+        last_character = sorted_string[i]
     return True
 
 
@@ -79,7 +102,9 @@ def is_unique_chars_sort(string: str) -> bool:
 
 
 class Test(unittest.TestCase):
-    test_cases = [
+    """Tests for the various is_unique_chars implementations."""
+
+    test_cases: list[tuple[str, bool]] = [
         ("abcd", True),
         ("s4fad", True),
         ("", True),
@@ -88,7 +113,7 @@ class Test(unittest.TestCase):
         ("".join([chr(val) for val in range(128)]), True),  # unique 128 chars
         ("".join([chr(val // 2) for val in range(129)]), False),  # non-unique 129 chars
     ]
-    test_functions = [
+    test_functions: list[Callable[[str], bool]] = [
         is_unique_chars_pythonic,
         is_unique_chars_algorithmic,
         is_unique_bit_vector,
@@ -98,24 +123,59 @@ class Test(unittest.TestCase):
         is_unique_chars_sort,
     ]
 
-    def test_is_unique_chars(self):
-        num_runs = 1000
-        function_runtimes = defaultdict(float)
+    def test_is_unique_chars(self) -> None:
+        """
+        Runs all is_unique_chars implementations against general test cases.
+        Also performs a mini-benchmark and prints the runtimes.
+        """
+        num_runs: int = 1000
+        # Using defaultdict to store sum of runtimes for each function
+        function_runtimes: defaultdict[str, float] = defaultdict(float)
 
         for _ in range(num_runs):
-            for text, expected in self.test_cases:
-                for is_unique_chars in self.test_functions:
-                    start = time.perf_counter()
-                    assert (
-                        is_unique_chars(text) == expected
-                    ), f"{is_unique_chars.__name__} failed for value: {text}"
-                    function_runtimes[is_unique_chars.__name__] += (
-                        time.perf_counter() - start
-                    ) * 1000
-
-        print(f"\n{num_runs} runs")
-        for function_name, runtime in function_runtimes.items():
-            print(f"{function_name}: {runtime:.1f}ms")
+            for text, expected_result in self.test_cases:
+                for is_unique_func in self.test_functions:
+                    start_time: float = time.perf_counter()
+                    actual_result = is_unique_func(text)
+                    assert actual_result == expected_result, (
+                        f"{is_unique_func.__name__}('{text}') returned {actual_result}, "
+                        f"expected {expected_result}"
+                    )
+                    function_runtimes[is_unique_func.__name__] += (
+                        time.perf_counter() - start_time
+                    ) * 1000 # Convert to milliseconds
+
+        print(f"\nBenchmark Results ({num_runs} runs per function):")
+        for function_name, total_runtime in function_runtimes.items():
+            # Calculate average runtime if needed, or print total as is
+            print(f"{function_name}: {total_runtime:.1f}ms (total)")
+
+    def test_non_ascii_handling(self) -> None:
+        """
+        Tests how different implementations handle non-ASCII characters.
+        - _algorithmic and _bit_vector should return False due to ASCII constraint.
+        - Other methods should handle them as regular characters.
+        """
+        non_ascii_test_cases = [
+            # (input_string, expected_for_ascii_limited_funcs, expected_for_unicode_funcs)
+            ("abcä", False, True), # 'ä' is non-ASCII
+            ("ü", False, True),    # 'ü' is non-ASCII
+            ("äöü", False, True),  # All non-ASCII, but unique among themselves
+            ("äbä", False, False), # Non-unique non-ASCII for Unicode funcs
+            ("€uro", False, True), # Euro sign
+        ]
+
+        for text, expected_ascii, expected_unicode in non_ascii_test_cases:
+            for func in self.test_functions:
+                is_ascii_limited = func.__name__ in (
+                    "is_unique_chars_algorithmic",
+                    "is_unique_bit_vector",
+                )
+                expected = expected_ascii if is_ascii_limited else expected_unicode
+                actual = func(text)
+                assert actual == expected, (
+                    f"{func.__name__}('{text}') returned {actual}, expected {expected}"
+                )
 
 
 if __name__ == "__main__":

diff --git a/chapter_01/p02_check_permutation.py b/chapter_01/p02_check_permutation.py
@@ -1,43 +1,72 @@
 # O(N)
 import unittest
 from collections import Counter
+from typing import Callable
 
 
-def check_permutation_by_sort(s1, s2):
+def check_permutation_by_sort(s1: str, s2: str) -> bool:
+    """
+    Checks if s2 is a permutation of s1 by sorting both strings.
+    Time complexity: O(N log N) due to sorting, where N is the length of the strings.
+    Space complexity: O(N) or O(log N) for sorting, depending on implementation.
+    """
     if len(s1) != len(s2):
         return False
-    s1, s2 = sorted(s1), sorted(s2)
-    for i in range(len(s1)):
-        if s1[i] != s2[i]:
+    s1_sorted = sorted(s1)
+    s2_sorted = sorted(s2)
+    # After sorting, if they are permutations, they must be identical.
+    for char1, char2 in zip(s1_sorted, s2_sorted):
+        if char1 != char2:
             return False
     return True
 
 
-def check_permutation_by_count(str1, str2):
+def check_permutation_by_count(str1: str, str2: str) -> bool:
+    """
+    Checks if str2 is a permutation of str1 using character counts.
+    Assumes an 8-bit character set (e.g., ASCII, up to 256 distinct characters).
+    Time complexity: O(N), where N is the length of the strings.
+    Space complexity: O(1) (fixed-size array for counts).
+    """
     if len(str1) != len(str2):
         return False
-    counter = [0] * 256
-    for c in str1:
-        counter[ord(c)] += 1
-    for c in str2:
-        if counter[ord(c)] == 0:
+    # Initialize counts for 256 possible characters (e.g., extended ASCII)
+    char_counts: list[int] = [0] * 256
+
+    for char_code in map(ord, str1):
+        if char_code >= 256:
+            # Character out of assumed 8-bit range
+            return False # Or raise ValueError("Non-8bit character found")
+        char_counts[char_code] += 1
+    for char_code in map(ord, str2):
+        if char_code >= 256:
+            # Character out of assumed 8-bit range
+            return False # Or raise ValueError("Non-8bit character found")
+        if char_counts[char_code] == 0: # Found a char in str2 not in str1 or too many of it
             return False
-        counter[ord(c)] -= 1
+        char_counts[char_code] -= 1
+    # All counts should be zero if they are permutations
     return True
 
 
-def check_permutation_pythonic(str1, str2):
-    # short-circuit to avoid instantiating a Counter which for big strings
-    # may be an expensive operation
+def check_permutation_pythonic(str1: str, str2: str) -> bool:
+    """
+    Checks if str2 is a permutation of str1 using collections.Counter.
+    Time complexity: O(N), where N is the length of the strings.
+    Space complexity: O(K), where K is the number of unique characters.
+    """
     if len(str1) != len(str2):
         return False
-
+    # Counter creates a hash map of character counts.
+    # Two strings are permutations if their character counts are identical.
     return Counter(str1) == Counter(str2)
 
 
 class Test(unittest.TestCase):
-    # str1, str2, is_permutation
-    test_cases = (
+    """Tests for the different check_permutation implementations."""
+
+    # Test cases: (string1, string2, expected_is_permutation)
+    test_cases: tuple[tuple[str, str, bool], ...] = (
         ("dog", "god", True),
         ("abcd", "bacd", True),
         ("3563476", "7334566", True),
@@ -51,17 +80,65 @@ class Test(unittest.TestCase):
         ("aaab", "bbba", False),
     )
 
-    testable_functions = [
+    testable_functions: list[Callable[[str, str], bool]] = [
         check_permutation_by_sort,
         check_permutation_by_count,
         check_permutation_pythonic,
     ]
 
-    def test_cp(self):
-        # true check
-        for check_permutation in self.testable_functions:
-            for str1, str2, expected in self.test_cases:
-                assert check_permutation(str1, str2) == expected
+    def test_check_permutation(self) -> None:
+        """Runs all check_permutation functions against the defined general test cases."""
+        for perm_function in self.testable_functions:
+            for s1_test, s2_test, expected_result in self.test_cases:
+                actual_result = perm_function(s1_test, s2_test)
+                assert actual_result == expected_result, (
+                    f"{perm_function.__name__}('{s1_test}', '{s2_test}') returned {actual_result}, "
+                    f"expected {expected_result}"
+                )
+
+    def test_extended_character_handling(self) -> None:
+        """
+        Tests how different implementations handle characters beyond typical ASCII (0-255).
+        - check_permutation_by_count should return False due to its 8-bit assumption.
+        - Other methods should handle them as regular Unicode characters.
+        """
+        char_gt_255 = chr(256) # Example: 'Ā' Latin A with Macron
+
+        # Test cases: (s1, s2, expected_for_8bit_limited_func, expected_for_unicode_funcs)
+        extended_char_test_cases = [
+            ("ab" + char_gt_255, char_gt_255 + "ba", False, True),
+            (char_gt_255, char_gt_255, False, True),
+            ("a" + char_gt_255 + "a", "aa" + char_gt_255, False, True),
+            ("a" + char_gt_255, "b" + char_gt_255, False, False), # Not permutations
+            # Test with a mix, one string having extended, other not (should be caught by length check first)
+            # but if lengths match, this tests the char processing
+            ("abc", "ab" + char_gt_255, False, False),
+        ]
+
+        # Add a case where one string has extended char and other doesn't, but lengths match due to other chars
+        # This is implicitly covered if one function returns False due to char_code >= 256,
+        # and the other function returns False because they are not permutations.
+        # e.g. s1="Ā", s2="a". Lengths match.
+        #   check_permutation_by_count("Ā", "a") -> False (due to Ā)
+        #   check_permutation_by_sort("Ā", "a") -> False (sorted("Ā") != sorted("a"))
+        #   check_permutation_pythonic("Ā", "a") -> False (Counter("Ā") != Counter("a"))
+        # This existing structure should handle it.
+
+        for s1, s2, expected_8bit, expected_unicode in extended_char_test_cases:
+            for func in self.testable_functions:
+                is_8bit_limited = func.__name__ == "check_permutation_by_count"
+
+                expected = expected_8bit if is_8bit_limited else expected_unicode
+
+                # For the case ("abc", "ab" + char_gt_255), expected_8bit is False.
+                # If func is check_permutation_by_count, it will see char_gt_255 in s2 and return False. Correct.
+                # If func is unicode-safe, it will also return False as they are not permutations. Correct.
+                # So for this specific case, expected_8bit and expected_unicode are both False.
+
+                actual = func(s1, s2)
+                assert actual == expected, (
+                    f"{func.__name__}('{s1}', '{s2}') returned {actual}, expected {expected}"
+                )
 
 
 if __name__ == "__main__":