|
| 1 | +""" |
| 2 | +347. Top K Frequent Elements |
| 3 | +
|
| 4 | +https://leetcode.com/problems/top-k-frequent-elements |
| 5 | +
|
| 6 | +NOTES |
| 7 | + * Use a heap or the quickselect algorithm. |
| 8 | +
|
| 9 | +Once you understand heaps (and the quickselect algorithm), this class of |
| 10 | +problems becomes trivial. |
| 11 | +
|
| 12 | +If you see *kth smallest*, *k closest*, or *top k* mentioned in a question, it |
| 13 | +typically means the problem can be solved using a heap. The general approach to |
| 14 | +solving these problems involves maintaining a max- or min-heap of size k. The |
| 15 | +kth smallest value is the root of the max-heap. The kth largest value is the |
| 16 | +root of the min-heap. To retrieve all k smallest (or largest) elements, simply |
| 17 | +return the sorted heap. |
| 18 | +
|
| 19 | +Though a heap offers O(nlog k) time complexity, the quickselect algorithm |
| 20 | +solves this problem in linear time (O(n)). |
| 21 | +""" |
| 22 | + |
| 23 | +import heapq |
| 24 | +from collections import Counter |
| 25 | + |
| 26 | + |
| 27 | +class Solution: |
| 28 | + """ |
| 29 | + This solution relies heavily on the Python Standard library, both for |
| 30 | + counting the frequency of elements (`Counter`) and finding the kth most |
| 31 | + frequent elements (`heapq.nlargest`). |
| 32 | +
|
| 33 | + To fully understand this solution, its advisable to write out the full |
| 34 | + solution. |
| 35 | + """ |
| 36 | + |
| 37 | + def topKFrequent(self, nums: list[int], k: int) -> list[int]: |
| 38 | + if k == len(nums): |
| 39 | + return nums |
| 40 | + # `Counter` provides a means for counting hashable items. Elements are |
| 41 | + # stored as keys and their counts are stored as values. |
| 42 | + count = Counter(nums) |
| 43 | + # `heapq.nlargest` returns the n (or k) largest elements in a dataset. |
| 44 | + # The 'key' parameter is function for retrieving the elements priority. |
| 45 | + return heapq.nlargest(n=k, iterable=count.keys(), key=count.get) |
| 46 | + |
| 47 | + |
| 48 | +class HeapSolution: |
| 49 | + """ |
| 50 | + This solution still leverages the `heapq` module of the Python Standard |
| 51 | + library, but implements its own kth largest algorithm. |
| 52 | +
|
| 53 | + This solution has O(nlog k) time complexity (log(k) comparisons/swaps for n |
| 54 | + elements)). |
| 55 | + """ |
| 56 | + |
| 57 | + def topKFrequent(self, nums: list[int], k: int) -> list[int]: |
| 58 | + if k == len(nums): |
| 59 | + return nums |
| 60 | + |
| 61 | + # Build a hash table of integer frequencies. |
| 62 | + # The time complexity of this operation is O(n). |
| 63 | + count: dict[int, int] = {} |
| 64 | + for n in nums: |
| 65 | + if n in count: |
| 66 | + count[n] += 1 |
| 67 | + else: |
| 68 | + count[n] = 1 |
| 69 | + |
| 70 | + # The top k frequent elements (or kth largest) problem can be |
| 71 | + # efficiently solved using a min-heap. Maintaining a heap of size k, |
| 72 | + # the kth largest value is always the root of the min-heap. For all k |
| 73 | + # largest elements, simply return the heap. |
| 74 | + heap: list[tuple[int, int]] = [] |
| 75 | + for i, (key, val) in enumerate(count.items()): |
| 76 | + # NOTE: Building a heap using k insertions is less performant |
| 77 | + # (O(klog k)), than building the heap using heapification (O(k)), |
| 78 | + # but simplifies the logic. |
| 79 | + if i < k: |
| 80 | + # NOTE: Python compares tuples element by element. Therefore, |
| 81 | + # the element frequency count is used to designate priority. |
| 82 | + heapq.heappush(heap, (val, key)) |
| 83 | + elif val > heap[0][0]: |
| 84 | + heapq.heapreplace(heap, (val, key)) |
| 85 | + |
| 86 | + # Since the problem states, "You may return the answer in any order.", |
| 87 | + # we simply need to return the heap. For consistency, the heap is |
| 88 | + # sorted anyway. |
| 89 | + return sorted([k for _, k in heap]) |
| 90 | + |
| 91 | + |
| 92 | +class QuickselectSolution: |
| 93 | + """ |
| 94 | + Return the top k frequent elements using the quickselect algorithm. |
| 95 | +
|
| 96 | + Quickselect (also known as Hoare's selection algorithm) is a selection |
| 97 | + algorithm to find the kth smallest (or largest) element in an unordered |
| 98 | + list of n elements. |
| 99 | +
|
| 100 | + Since quickselect returns the kth element in the list, elements less than k |
| 101 | + are guaranteed to be less than (or greater than) k. Thus allowing us to |
| 102 | + return the top k frequent elements in any order. |
| 103 | +
|
| 104 | + This solution has O(n) average-case and O(n^2) worst-case time complexity. |
| 105 | +
|
| 106 | + NOTE: Instead of finding the (n - k)th element, we simply reverse the |
| 107 | + comparison in the `partion()` function, since quickselect typically puts |
| 108 | + elements in ascending order. |
| 109 | + """ |
| 110 | + |
| 111 | + def topKFrequent(self, nums: list[int], k: int) -> list[int]: |
| 112 | + if k == len(nums): |
| 113 | + return nums |
| 114 | + |
| 115 | + # Build a hash table of integer frequencies. |
| 116 | + # The time complexity of this operation is O(n). |
| 117 | + count: dict[int, int] = {} |
| 118 | + for n in nums: |
| 119 | + if n in count: |
| 120 | + count[n] += 1 |
| 121 | + else: |
| 122 | + count[n] = 1 |
| 123 | + |
| 124 | + # The quickselect algorithm modifies the list in-place. Therefore, we |
| 125 | + # create a list of unique keys, which serves as our list. The values |
| 126 | + # associated with these keys are used for comparisons. |
| 127 | + l = list(count.keys()) |
| 128 | + self.quickselect(count, l, 0, len(l) - 1, k) |
| 129 | + # Since the problem states, "You may return the answer in any order.", |
| 130 | + # we simply need to return the list up to k. For consistency, the heap |
| 131 | + # is sorted anyway. |
| 132 | + return sorted(l[:k]) |
| 133 | + |
| 134 | + def quickselect(self, d: dict[int, int], l: list[int], left: int, right: int, k: int) -> int: |
| 135 | + """ |
| 136 | + Return the kth element (0-based) in the given list. |
| 137 | + """ |
| 138 | + if left == right: |
| 139 | + return l[left] |
| 140 | + |
| 141 | + # Retrieve the index of the pivot by partitioning the list into |
| 142 | + # elements greater than or less than or equal to the pivot. |
| 143 | + pivot = self.partition(d, l, left, right) |
| 144 | + |
| 145 | + # If k is equal to 'pivot', then l[pivot] is the kth element in the |
| 146 | + # list. Otherwise, execute quickselect on the partition comprising |
| 147 | + # elements greater than or less than or equal to the pivot. This |
| 148 | + # partition is guaranteed to contain the kth element. |
| 149 | + if k == pivot: |
| 150 | + return l[k] |
| 151 | + elif k < pivot: |
| 152 | + return self.quickselect(d, l, left, pivot - 1, k) |
| 153 | + else: |
| 154 | + return self.quickselect(d, l, pivot + 1, right, k) |
| 155 | + |
| 156 | + def partition(self, d: dict[int, int], l: list[int], left: int, right: int) -> int: |
| 157 | + """ |
| 158 | + Reorder the list such that elements greater than the pivot are before |
| 159 | + elements less than or equal to the pivot. When complete, the pivot is |
| 160 | + in its final sorted position. The pivot is chosen as the last element |
| 161 | + in the parition (Lomuto partition scheme). |
| 162 | + """ |
| 163 | + |
| 164 | + # Choose the last element (right) as the pivot. |
| 165 | + pivot = l[right] |
| 166 | + |
| 167 | + # i (commonly referred to as the "store index") is used to denote the |
| 168 | + # index of the pivot. j is used for scanning the list from left to |
| 169 | + # right-1. |
| 170 | + i, j = left, left |
| 171 | + |
| 172 | + # The loop maintains the following invariant: |
| 173 | + # |
| 174 | + # Elements left through i-1 (inclusive) are > pivot |
| 175 | + # Elements i through j (inclusive) are ≤ pivot |
| 176 | + while j < right: |
| 177 | + if d[l[j]] > d[pivot]: |
| 178 | + l[i], l[j] = l[j], l[i] |
| 179 | + i += 1 |
| 180 | + j += 1 |
| 181 | + |
| 182 | + # As a final step, move pivot to its final position. This will be its |
| 183 | + # final position in the sorted array. |
| 184 | + l[i], l[right] = l[right], l[i] |
| 185 | + |
| 186 | + # Return the index of the pivot. The pivot index is used to determine |
| 187 | + # the new left and right arguments for quickselect. |
| 188 | + return i |
0 commit comments