-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtop_k_frequent_elements.py
188 lines (153 loc) · 7.18 KB
/
top_k_frequent_elements.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""
347. Top K Frequent Elements
https://leetcode.com/problems/top-k-frequent-elements
NOTES
* Use a heap or the quickselect algorithm.
Once you understand heaps (and the quickselect algorithm), this class of
problems becomes trivial.
If you see *kth smallest*, *k closest*, or *top k* mentioned in a question, it
typically means the problem can be solved using a heap. The general approach to
solving these problems involves maintaining a max- or min-heap of size k. The
kth smallest value is the root of the max-heap. The kth largest value is the
root of the min-heap. To retrieve all k smallest (or largest) elements, simply
return the sorted heap.
Though a heap offers O(nlog k) time complexity, the quickselect algorithm
solves this problem in linear time (O(n)).
"""
import heapq
from collections import Counter
class Solution:
"""
This solution relies heavily on the Python Standard library, both for
counting the frequency of elements (`Counter`) and finding the kth most
frequent elements (`heapq.nlargest`).
To fully understand this solution, its advisable to write out the full
solution.
"""
def topKFrequent(self, nums: list[int], k: int) -> list[int]:
if k == len(nums):
return nums
# `Counter` provides a means for counting hashable items. Elements are
# stored as keys and their counts are stored as values.
count = Counter(nums)
# `heapq.nlargest` returns the n (or k) largest elements in a dataset.
# The 'key' parameter is function for retrieving the elements priority.
return heapq.nlargest(n=k, iterable=count.keys(), key=count.get)
class HeapSolution:
"""
This solution still leverages the `heapq` module of the Python Standard
library, but implements its own kth largest algorithm.
This solution has O(nlog k) time complexity (log(k) comparisons/swaps for n
elements)).
"""
def topKFrequent(self, nums: list[int], k: int) -> list[int]:
if k == len(nums):
return nums
# Build a hash table of integer frequencies.
# The time complexity of this operation is O(n).
count: dict[int, int] = {}
for n in nums:
if n in count:
count[n] += 1
else:
count[n] = 1
# The top k frequent elements (or kth largest) problem can be
# efficiently solved using a min-heap. Maintaining a heap of size k,
# the kth largest value is always the root of the min-heap. For all k
# largest elements, simply return the heap.
heap: list[tuple[int, int]] = []
for i, (key, val) in enumerate(count.items()):
# NOTE: Building a heap using k insertions is less performant
# (O(klog k)), than building the heap using heapification (O(k)),
# but simplifies the logic.
if i < k:
# NOTE: Python compares tuples element by element. Therefore,
# the element frequency count is used to designate priority.
heapq.heappush(heap, (val, key))
elif val > heap[0][0]:
heapq.heapreplace(heap, (val, key))
# Since the problem states, "You may return the answer in any order.",
# we simply need to return the heap. For consistency, the heap is
# sorted anyway.
return sorted([k for _, k in heap])
class QuickselectSolution:
"""
Return the top k frequent elements using the quickselect algorithm.
Quickselect (also known as Hoare's selection algorithm) is a selection
algorithm to find the kth smallest (or largest) element in an unordered
list of n elements.
Since quickselect returns the kth element in the list, elements less than k
are guaranteed to be less than (or greater than) k. Thus allowing us to
return the top k frequent elements in any order.
This solution has O(n) average-case and O(n^2) worst-case time complexity.
NOTE: Instead of finding the (n - k)th element, we simply reverse the
comparison in the `partion()` function, since quickselect typically puts
elements in ascending order.
"""
def topKFrequent(self, nums: list[int], k: int) -> list[int]:
if k == len(nums):
return nums
# Build a hash table of integer frequencies.
# The time complexity of this operation is O(n).
count: dict[int, int] = {}
for n in nums:
if n in count:
count[n] += 1
else:
count[n] = 1
# The quickselect algorithm modifies the list in-place. Therefore, we
# create a list of unique keys, which serves as our list. The values
# associated with these keys are used for comparisons.
l = list(count.keys())
self.quickselect(count, l, 0, len(l) - 1, k)
# Since the problem states, "You may return the answer in any order.",
# we simply need to return the list up to k. For consistency, the heap
# is sorted anyway.
return sorted(l[:k])
def quickselect(self, d: dict[int, int], l: list[int], left: int, right: int, k: int) -> int:
"""
Return the kth element (0-based) in the given list.
"""
if left == right:
return l[left]
# Retrieve the index of the pivot by partitioning the list into
# elements greater than or less than or equal to the pivot.
pivot = self.partition(d, l, left, right)
# If k is equal to 'pivot', then l[pivot] is the kth element in the
# list. Otherwise, execute quickselect on the partition comprising
# elements greater than or less than or equal to the pivot. This
# partition is guaranteed to contain the kth element.
if k == pivot:
return l[k]
elif k < pivot:
return self.quickselect(d, l, left, pivot - 1, k)
else:
return self.quickselect(d, l, pivot + 1, right, k)
def partition(self, d: dict[int, int], l: list[int], left: int, right: int) -> int:
"""
Reorder the list such that elements greater than the pivot are before
elements less than or equal to the pivot. When complete, the pivot is
in its final sorted position. The pivot is chosen as the last element
in the parition (Lomuto partition scheme).
"""
# Choose the last element (right) as the pivot.
pivot = l[right]
# i (commonly referred to as the "store index") is used to denote the
# index of the pivot. j is used for scanning the list from left to
# right-1.
i, j = left, left
# The loop maintains the following invariant:
#
# Elements left through i-1 (inclusive) are > pivot
# Elements i through j (inclusive) are ≤ pivot
while j < right:
if d[l[j]] > d[pivot]:
l[i], l[j] = l[j], l[i]
i += 1
j += 1
# As a final step, move pivot to its final position. This will be its
# final position in the sorted array.
l[i], l[right] = l[right], l[i]
# Return the index of the pivot. The pivot index is used to determine
# the new left and right arguments for quickselect.
return i