Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 114 additions & 54 deletions chapter_01/p01_is_unique.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,94 @@
import time
import unittest
from collections import defaultdict
from typing import Callable


def is_unique_chars_algorithmic(string):
# Assuming character set is ASCII (128 characters)
if len(string) > 128:
def is_unique_chars_algorithmic(string: str) -> bool:
"""
Determines if a string has all unique characters using a boolean array.
Assumes the character set is ASCII (128 characters).
Time complexity: O(N), where N is the length of the string (at most 128).
Space complexity: O(1) (boolean array size is fixed at 128).
"""
if len(string) > 128: # Based on ASCII assumption
return False

# this is a pythonic and faster way to initialize an array with a fixed value.
# careful though it won't work for a doubly nested array
char_set = [False] * 128
for char in string:
val = ord(char)
if char_set[val]:
char_set: list[bool] = [False] * 128 # Boolean array to track characters
for char_code in map(ord, string):
if char_code > 127:
# Character out of assumed ASCII range
return False # Or raise ValueError("Non-ASCII character found")
if char_set[char_code]:
# Char already found in string
return False
char_set[val] = True
char_set[char_code] = True

return True


def is_unique_chars_pythonic(string):
def is_unique_chars_pythonic(string: str) -> bool:
"""
Determines if a string has all unique characters using Python's set properties.
Time complexity: O(N) on average, where N is the length of the string.
Space complexity: O(N) in the worst case for the set.
"""
return len(set(string)) == len(string)


def is_unique_bit_vector(string):
"""Uses bitwise operation instead of extra data structures."""
# Assuming character set is ASCII (128 characters)
if len(string) > 128:
def is_unique_bit_vector(string: str) -> bool:
"""
Determines if a string has all unique characters using a bit vector.
Assumes the character set is ASCII (128 characters, fitting within integer bits if extended).
This implementation assumes characters 'a' through 'z' or a similar limited range
if not strictly ASCII values that might exceed typical integer bit sizes for a direct bit vector.
For full ASCII (0-127), this approach is fine.
Time complexity: O(N).
Space complexity: O(1).
"""
# Assuming character set is ASCII (128 characters) for this specific check
if len(string) > 128: # Optimization for this assumption
return False

checker = 0
for c in string:
val = ord(c)
if (checker & (1 << val)) > 0:
checker: int = 0
for char_code in map(ord, string):
if char_code > 127:
# Character out of assumed ASCII range for this bit vector approach
return False # Or raise ValueError("Non-ASCII character found")
if (checker & (1 << char_code)) > 0:
return False
checker |= 1 << val
checker |= (1 << char_code)
return True


def is_unique_chars_using_dictionary(string: str) -> bool:
character_counts = {}
"""
Determines if a string has all unique characters using a dictionary (hash map).
Time complexity: O(N) on average, where N is the length of the string.
Space complexity: O(K), where K is the number of unique characters (at most N).
"""
character_counts: dict[str, bool] = {} # Stores seen characters
for char in string:
if char in character_counts:
return False
character_counts[char] = 1
character_counts[char] = True # Value can be True, or a count, doesn't matter
return True


def is_unique_chars_using_set(string: str) -> bool:
characters_seen = set()
for char in string:
if char in characters_seen:
return False
characters_seen.add(char)
return True


# O(NlogN)
def is_unique_chars_sorting(string: str) -> bool:
"""
Determines if a string has all unique characters by sorting it first.
Time complexity: O(N log N) due to sorting.
Space complexity: O(N) or O(log N) depending on sort implementation (Python's Timsort is O(N)).
"""
if not string:
return True
sorted_string = sorted(string)
last_character = None
for char in sorted_string:
if char == last_character:
last_character = sorted_string[0]
for i in range(1, len(sorted_string)):
if sorted_string[i] == last_character:
return False
last_character = char
last_character = sorted_string[i]
return True


Expand All @@ -79,7 +102,9 @@ def is_unique_chars_sort(string: str) -> bool:


class Test(unittest.TestCase):
test_cases = [
"""Tests for the various is_unique_chars implementations."""

test_cases: list[tuple[str, bool]] = [
("abcd", True),
("s4fad", True),
("", True),
Expand All @@ -88,7 +113,7 @@ class Test(unittest.TestCase):
("".join([chr(val) for val in range(128)]), True), # unique 128 chars
("".join([chr(val // 2) for val in range(129)]), False), # non-unique 129 chars
]
test_functions = [
test_functions: list[Callable[[str], bool]] = [
is_unique_chars_pythonic,
is_unique_chars_algorithmic,
is_unique_bit_vector,
Expand All @@ -98,24 +123,59 @@ class Test(unittest.TestCase):
is_unique_chars_sort,
]

def test_is_unique_chars(self):
num_runs = 1000
function_runtimes = defaultdict(float)
def test_is_unique_chars(self) -> None:
"""
Runs all is_unique_chars implementations against general test cases.
Also performs a mini-benchmark and prints the runtimes.
"""
num_runs: int = 1000
# Using defaultdict to store sum of runtimes for each function
function_runtimes: defaultdict[str, float] = defaultdict(float)

for _ in range(num_runs):
for text, expected in self.test_cases:
for is_unique_chars in self.test_functions:
start = time.perf_counter()
assert (
is_unique_chars(text) == expected
), f"{is_unique_chars.__name__} failed for value: {text}"
function_runtimes[is_unique_chars.__name__] += (
time.perf_counter() - start
) * 1000

print(f"\n{num_runs} runs")
for function_name, runtime in function_runtimes.items():
print(f"{function_name}: {runtime:.1f}ms")
for text, expected_result in self.test_cases:
for is_unique_func in self.test_functions:
start_time: float = time.perf_counter()
actual_result = is_unique_func(text)
assert actual_result == expected_result, (
f"{is_unique_func.__name__}('{text}') returned {actual_result}, "
f"expected {expected_result}"
)
function_runtimes[is_unique_func.__name__] += (
time.perf_counter() - start_time
) * 1000 # Convert to milliseconds

print(f"\nBenchmark Results ({num_runs} runs per function):")
for function_name, total_runtime in function_runtimes.items():
# Calculate average runtime if needed, or print total as is
print(f"{function_name}: {total_runtime:.1f}ms (total)")

def test_non_ascii_handling(self) -> None:
"""
Tests how different implementations handle non-ASCII characters.
- _algorithmic and _bit_vector should return False due to ASCII constraint.
- Other methods should handle them as regular characters.
"""
non_ascii_test_cases = [
# (input_string, expected_for_ascii_limited_funcs, expected_for_unicode_funcs)
("abcä", False, True), # 'ä' is non-ASCII
("ü", False, True), # 'ü' is non-ASCII
("äöü", False, True), # All non-ASCII, but unique among themselves
("äbä", False, False), # Non-unique non-ASCII for Unicode funcs
("€uro", False, True), # Euro sign
]

for text, expected_ascii, expected_unicode in non_ascii_test_cases:
for func in self.test_functions:
is_ascii_limited = func.__name__ in (
"is_unique_chars_algorithmic",
"is_unique_bit_vector",
)
expected = expected_ascii if is_ascii_limited else expected_unicode
actual = func(text)
assert actual == expected, (
f"{func.__name__}('{text}') returned {actual}, expected {expected}"
)


if __name__ == "__main__":
Expand Down
123 changes: 100 additions & 23 deletions chapter_01/p02_check_permutation.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,72 @@
# O(N)
import unittest
from collections import Counter
from typing import Callable


def check_permutation_by_sort(s1, s2):
def check_permutation_by_sort(s1: str, s2: str) -> bool:
"""
Checks if s2 is a permutation of s1 by sorting both strings.
Time complexity: O(N log N) due to sorting, where N is the length of the strings.
Space complexity: O(N) or O(log N) for sorting, depending on implementation.
"""
if len(s1) != len(s2):
return False
s1, s2 = sorted(s1), sorted(s2)
for i in range(len(s1)):
if s1[i] != s2[i]:
s1_sorted = sorted(s1)
s2_sorted = sorted(s2)
# After sorting, if they are permutations, they must be identical.
for char1, char2 in zip(s1_sorted, s2_sorted):
if char1 != char2:
return False
return True


def check_permutation_by_count(str1, str2):
def check_permutation_by_count(str1: str, str2: str) -> bool:
"""
Checks if str2 is a permutation of str1 using character counts.
Assumes an 8-bit character set (e.g., ASCII, up to 256 distinct characters).
Time complexity: O(N), where N is the length of the strings.
Space complexity: O(1) (fixed-size array for counts).
"""
if len(str1) != len(str2):
return False
counter = [0] * 256
for c in str1:
counter[ord(c)] += 1
for c in str2:
if counter[ord(c)] == 0:
# Initialize counts for 256 possible characters (e.g., extended ASCII)
char_counts: list[int] = [0] * 256

for char_code in map(ord, str1):
if char_code >= 256:
# Character out of assumed 8-bit range
return False # Or raise ValueError("Non-8bit character found")
char_counts[char_code] += 1
for char_code in map(ord, str2):
if char_code >= 256:
# Character out of assumed 8-bit range
return False # Or raise ValueError("Non-8bit character found")
if char_counts[char_code] == 0: # Found a char in str2 not in str1 or too many of it
return False
counter[ord(c)] -= 1
char_counts[char_code] -= 1
# All counts should be zero if they are permutations
return True


def check_permutation_pythonic(str1, str2):
# short-circuit to avoid instantiating a Counter which for big strings
# may be an expensive operation
def check_permutation_pythonic(str1: str, str2: str) -> bool:
"""
Checks if str2 is a permutation of str1 using collections.Counter.
Time complexity: O(N), where N is the length of the strings.
Space complexity: O(K), where K is the number of unique characters.
"""
if len(str1) != len(str2):
return False

# Counter creates a hash map of character counts.
# Two strings are permutations if their character counts are identical.
return Counter(str1) == Counter(str2)


class Test(unittest.TestCase):
# str1, str2, is_permutation
test_cases = (
"""Tests for the different check_permutation implementations."""

# Test cases: (string1, string2, expected_is_permutation)
test_cases: tuple[tuple[str, str, bool], ...] = (
("dog", "god", True),
("abcd", "bacd", True),
("3563476", "7334566", True),
Expand All @@ -51,17 +80,65 @@ class Test(unittest.TestCase):
("aaab", "bbba", False),
)

testable_functions = [
testable_functions: list[Callable[[str, str], bool]] = [
check_permutation_by_sort,
check_permutation_by_count,
check_permutation_pythonic,
]

def test_cp(self):
# true check
for check_permutation in self.testable_functions:
for str1, str2, expected in self.test_cases:
assert check_permutation(str1, str2) == expected
def test_check_permutation(self) -> None:
"""Runs all check_permutation functions against the defined general test cases."""
for perm_function in self.testable_functions:
for s1_test, s2_test, expected_result in self.test_cases:
actual_result = perm_function(s1_test, s2_test)
assert actual_result == expected_result, (
f"{perm_function.__name__}('{s1_test}', '{s2_test}') returned {actual_result}, "
f"expected {expected_result}"
)

def test_extended_character_handling(self) -> None:
"""
Tests how different implementations handle characters beyond typical ASCII (0-255).
- check_permutation_by_count should return False due to its 8-bit assumption.
- Other methods should handle them as regular Unicode characters.
"""
char_gt_255 = chr(256) # Example: 'Ā' Latin A with Macron

# Test cases: (s1, s2, expected_for_8bit_limited_func, expected_for_unicode_funcs)
extended_char_test_cases = [
("ab" + char_gt_255, char_gt_255 + "ba", False, True),
(char_gt_255, char_gt_255, False, True),
("a" + char_gt_255 + "a", "aa" + char_gt_255, False, True),
("a" + char_gt_255, "b" + char_gt_255, False, False), # Not permutations
# Test with a mix, one string having extended, other not (should be caught by length check first)
# but if lengths match, this tests the char processing
("abc", "ab" + char_gt_255, False, False),
]

# Add a case where one string has extended char and other doesn't, but lengths match due to other chars
# This is implicitly covered if one function returns False due to char_code >= 256,
# and the other function returns False because they are not permutations.
# e.g. s1="Ā", s2="a". Lengths match.
# check_permutation_by_count("Ā", "a") -> False (due to Ā)
# check_permutation_by_sort("Ā", "a") -> False (sorted("Ā") != sorted("a"))
# check_permutation_pythonic("Ā", "a") -> False (Counter("Ā") != Counter("a"))
# This existing structure should handle it.

for s1, s2, expected_8bit, expected_unicode in extended_char_test_cases:
for func in self.testable_functions:
is_8bit_limited = func.__name__ == "check_permutation_by_count"

expected = expected_8bit if is_8bit_limited else expected_unicode

# For the case ("abc", "ab" + char_gt_255), expected_8bit is False.
# If func is check_permutation_by_count, it will see char_gt_255 in s2 and return False. Correct.
# If func is unicode-safe, it will also return False as they are not permutations. Correct.
# So for this specific case, expected_8bit and expected_unicode are both False.

actual = func(s1, s2)
assert actual == expected, (
f"{func.__name__}('{s1}', '{s2}') returned {actual}, expected {expected}"
)


if __name__ == "__main__":
Expand Down
Loading