Skip to content

Commit 2ebfc69

Browse files
Add 'Top K Frequent Elements'
1 parent 76f5c08 commit 2ebfc69

File tree

3 files changed

+241
-0
lines changed

3 files changed

+241
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ A collection of LeetCode solutions
5050

5151
[Subtree of Another Tree](./src/two_sum.py)
5252

53+
[Top K Frequent Elements](./src/top_k_frequent_elements.py)
54+
5355
[Two Sum](./src/two_sum.py)
5456

5557
[Valid Anagram](./src/valid_anagram.py)

src/top_k_frequent_elements.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
"""
2+
347. Top K Frequent Elements
3+
4+
https://leetcode.com/problems/top-k-frequent-elements
5+
6+
NOTES
7+
* Use a heap or the quickselect algorithm.
8+
9+
Once you understand heaps (and the quickselect algorithm), this class of
10+
problems becomes trivial.
11+
12+
If you see *kth smallest*, *k closest*, or *top k* mentioned in a question, it
13+
typically means the problem can be solved using a heap. The general approach to
14+
solving these problems involves maintaining a max- or min-heap of size k. The
15+
kth smallest value is the root of the max-heap. The kth largest value is the
16+
root of the min-heap. To retrieve all k smallest (or largest) elements, simply
17+
return the sorted heap.
18+
19+
Though a heap offers O(nlog k) time complexity, the quickselect algorithm
20+
solves this problem in linear time (O(n)).
21+
"""
22+
23+
import heapq
24+
from collections import Counter
25+
26+
27+
class Solution:
28+
"""
29+
This solution relies heavily on the Python Standard library, both for
30+
counting the frequency of elements (`Counter`) and finding the kth most
31+
frequent elements (`heapq.nlargest`).
32+
33+
To fully understand this solution, its advisable to write out the full
34+
solution.
35+
"""
36+
37+
def topKFrequent(self, nums: list[int], k: int) -> list[int]:
38+
if k == len(nums):
39+
return nums
40+
# `Counter` provides a means for counting hashable items. Elements are
41+
# stored as keys and their counts are stored as values.
42+
count = Counter(nums)
43+
# `heapq.nlargest` returns the n (or k) largest elements in a dataset.
44+
# The 'key' parameter is function for retrieving the elements priority.
45+
return heapq.nlargest(n=k, iterable=count.keys(), key=count.get)
46+
47+
48+
class HeapSolution:
49+
"""
50+
This solution still leverages the `heapq` module of the Python Standard
51+
library, but implements its own kth largest algorithm.
52+
53+
This solution has O(nlog k) time complexity (log(k) comparisons/swaps for n
54+
elements)).
55+
"""
56+
57+
def topKFrequent(self, nums: list[int], k: int) -> list[int]:
58+
if k == len(nums):
59+
return nums
60+
61+
# Build a hash table of integer frequencies.
62+
# The time complexity of this operation is O(n).
63+
count: dict[int, int] = {}
64+
for n in nums:
65+
if n in count:
66+
count[n] += 1
67+
else:
68+
count[n] = 1
69+
70+
# The top k frequent elements (or kth largest) problem can be
71+
# efficiently solved using a min-heap. Maintaining a heap of size k,
72+
# the kth largest value is always the root of the min-heap. For all k
73+
# largest elements, simply return the heap.
74+
heap: list[tuple[int, int]] = []
75+
for i, (key, val) in enumerate(count.items()):
76+
# NOTE: Building a heap using k insertions is less performant
77+
# (O(klog k)), than building the heap using heapification (O(k)),
78+
# but simplifies the logic.
79+
if i < k:
80+
# NOTE: Python compares tuples element by element. Therefore,
81+
# the element frequency count is used to designate priority.
82+
heapq.heappush(heap, (val, key))
83+
elif val > heap[0][0]:
84+
heapq.heapreplace(heap, (val, key))
85+
86+
# Since the problem states, "You may return the answer in any order.",
87+
# we simply need to return the heap. For consistency, the heap is
88+
# sorted anyway.
89+
return sorted([k for _, k in heap])
90+
91+
92+
class QuickselectSolution:
93+
"""
94+
Return the top k frequent elements using the quickselect algorithm.
95+
96+
Quickselect (also known as Hoare's selection algorithm) is a selection
97+
algorithm to find the kth smallest (or largest) element in an unordered
98+
list of n elements.
99+
100+
Since quickselect returns the kth element in the list, elements less than k
101+
are guaranteed to be less than (or greater than) k. Thus allowing us to
102+
return the top k frequent elements in any order.
103+
104+
This solution has O(n) average-case and O(n^2) worst-case time complexity.
105+
106+
NOTE: Instead of finding the (n - k)th element, we simply reverse the
107+
comparison in the `partion()` function, since quickselect typically puts
108+
elements in ascending order.
109+
"""
110+
111+
def topKFrequent(self, nums: list[int], k: int) -> list[int]:
112+
if k == len(nums):
113+
return nums
114+
115+
# Build a hash table of integer frequencies.
116+
# The time complexity of this operation is O(n).
117+
count: dict[int, int] = {}
118+
for n in nums:
119+
if n in count:
120+
count[n] += 1
121+
else:
122+
count[n] = 1
123+
124+
# The quickselect algorithm modifies the list in-place. Therefore, we
125+
# create a list of unique keys, which serves as our list. The values
126+
# associated with these keys are used for comparisons.
127+
l = list(count.keys())
128+
self.quickselect(count, l, 0, len(l) - 1, k)
129+
# Since the problem states, "You may return the answer in any order.",
130+
# we simply need to return the list up to k. For consistency, the heap
131+
# is sorted anyway.
132+
return sorted(l[:k])
133+
134+
def quickselect(self, d: dict[int, int], l: list[int], left: int, right: int, k: int) -> int:
135+
"""
136+
Return the kth element (0-based) in the given list.
137+
"""
138+
if left == right:
139+
return l[left]
140+
141+
# Retrieve the index of the pivot by partitioning the list into
142+
# elements greater than or less than or equal to the pivot.
143+
pivot = self.partition(d, l, left, right)
144+
145+
# If k is equal to 'pivot', then l[pivot] is the kth element in the
146+
# list. Otherwise, execute quickselect on the partition comprising
147+
# elements greater than or less than or equal to the pivot. This
148+
# partition is guaranteed to contain the kth element.
149+
if k == pivot:
150+
return l[k]
151+
elif k < pivot:
152+
return self.quickselect(d, l, left, pivot - 1, k)
153+
else:
154+
return self.quickselect(d, l, pivot + 1, right, k)
155+
156+
def partition(self, d: dict[int, int], l: list[int], left: int, right: int) -> int:
157+
"""
158+
Reorder the list such that elements greater than the pivot are before
159+
elements less than or equal to the pivot. When complete, the pivot is
160+
in its final sorted position. The pivot is chosen as the last element
161+
in the parition (Lomuto partition scheme).
162+
"""
163+
164+
# Choose the last element (right) as the pivot.
165+
pivot = l[right]
166+
167+
# i (commonly referred to as the "store index") is used to denote the
168+
# index of the pivot. j is used for scanning the list from left to
169+
# right-1.
170+
i, j = left, left
171+
172+
# The loop maintains the following invariant:
173+
#
174+
# Elements left through i-1 (inclusive) are > pivot
175+
# Elements i through j (inclusive) are ≤ pivot
176+
while j < right:
177+
if d[l[j]] > d[pivot]:
178+
l[i], l[j] = l[j], l[i]
179+
i += 1
180+
j += 1
181+
182+
# As a final step, move pivot to its final position. This will be its
183+
# final position in the sorted array.
184+
l[i], l[right] = l[right], l[i]
185+
186+
# Return the index of the pivot. The pivot index is used to determine
187+
# the new left and right arguments for quickselect.
188+
return i

tests/test_top_k_frequent_elements.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""
2+
347. Top K Frequent Elements
3+
4+
https://leetcode.com/problems/top-k-frequent-elements
5+
"""
6+
7+
from unittest import TestCase
8+
9+
from src.top_k_frequent_elements import HeapSolution, QuickselectSolution, Solution
10+
11+
12+
class TestSolution(TestCase):
13+
def test_1(self):
14+
exp = [1, 2]
15+
assert Solution().topKFrequent([1, 1, 1, 2, 2, 3], 2) == exp
16+
17+
def test_2(self):
18+
exp = [1]
19+
assert Solution().topKFrequent([1], 1) == exp
20+
21+
def test_3(self):
22+
exp = [1, 2, 3]
23+
assert Solution().topKFrequent([1, 1, 1, 2, 2, 2, 3, 3, 3], 3) == exp
24+
25+
26+
class TestHeapSolution(TestCase):
27+
def test_1(self):
28+
exp = [1, 2]
29+
assert HeapSolution().topKFrequent([1, 1, 1, 2, 2, 3], 2) == exp
30+
31+
def test_2(self):
32+
exp = [1]
33+
assert HeapSolution().topKFrequent([1], 1) == exp
34+
35+
def test_3(self):
36+
exp = [1, 2, 3]
37+
assert HeapSolution().topKFrequent([1, 1, 1, 2, 2, 2, 3, 3, 3], 3) == exp
38+
39+
40+
class TestQuickselectSolution(TestCase):
41+
def test_1(self):
42+
exp = [1, 2]
43+
assert QuickselectSolution().topKFrequent([1, 1, 1, 2, 2, 3], 2) == exp
44+
45+
def test_2(self):
46+
exp = [1]
47+
assert QuickselectSolution().topKFrequent([1], 1) == exp
48+
49+
def test_3(self):
50+
exp = [1, 2, 3]
51+
assert QuickselectSolution().topKFrequent([1, 1, 1, 2, 2, 2, 3, 3, 3], 3) == exp

0 commit comments

Comments
 (0)