-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathFindMedianFromDataStream.java
385 lines (341 loc) · 12.5 KB
/
FindMedianFromDataStream.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
package LeetCodeJava.Heap;
// https://leetcode.com/problems/find-median-from-data-stream/?envType=list&envId=xoqag3yj
import java.util.*;
/**
* 295. Find Median from Data Stream
* Hard
* Topics
* Companies
* The median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value, and the median is the mean of the two middle values.
*
* For example, for arr = [2,3,4], the median is 3.
* For example, for arr = [2,3], the median is (2 + 3) / 2 = 2.5.
* Implement the MedianFinder class:
*
* MedianFinder() initializes the MedianFinder object.
* void addNum(int num) adds the integer num from the data stream to the data structure.
* double findMedian() returns the median of all elements so far. Answers within 10-5 of the actual answer will be accepted.
*
*
* Example 1:
*
* Input
* ["MedianFinder", "addNum", "addNum", "findMedian", "addNum", "findMedian"]
* [[], [1], [2], [], [3], []]
* Output
* [null, null, null, 1.5, null, 2.0]
*
* Explanation
* MedianFinder medianFinder = new MedianFinder();
* medianFinder.addNum(1); // arr = [1]
* medianFinder.addNum(2); // arr = [1, 2]
* medianFinder.findMedian(); // return 1.5 (i.e., (1 + 2) / 2)
* medianFinder.addNum(3); // arr[1, 2, 3]
* medianFinder.findMedian(); // return 2.0
*
*
* Constraints:
*
* -105 <= num <= 105
* There will be at least one element in the data structure before calling findMedian.
* At most 5 * 104 calls will be made to addNum and findMedian.
*
*
* Follow up:
*
* If all integer numbers from the stream are in the range [0, 100], how would you optimize your solution?
* If 99% of all integer numbers from the stream are in the range [0, 100], how would you optimize your solution?
*
*
*/
public class FindMedianFromDataStream {
// V0
// IDEA: SMALL, BIG PQ (fixed by gpt) + `rebalance` PQ
class MedianFinder {
/**
* - small_pq: A max-heap (stores the `smaller` half of the numbers).
* The root will always be the largest number
* in the smaller half.
*
* - big_pq: A min-heap (stores the `larger` half of the numbers).
* The root will always be the smallest
* number in the larger half.
*
*
*
* -> NOTE !!!
*
* small_pq : MAX PQ
* big_pq : MIN PQ
*
*/
PriorityQueue<Integer> small_pq; // max-heap (stores smaller half)
PriorityQueue<Integer> big_pq; // min-heap (stores larger half)
public MedianFinder() {
// Initialize small_pq as a max-heap
this.small_pq = new PriorityQueue<>(Comparator.reverseOrder());
// Initialize big_pq as a min-heap (default behavior)
this.big_pq = new PriorityQueue<>();
}
public void addNum(int num) {
// Add the new number to the appropriate heap
if (this.small_pq.isEmpty() || num <= this.small_pq.peek()) {
this.small_pq.add(num);
} else {
this.big_pq.add(num);
}
/**
* NOTE !!!
*
* The crucial part is to maintain the `balance`
* between the two heaps so that they
* `have roughly the SAME NUMBER of elements.`
*
*
* -> so in `rebalance` code below,
* -> we try to keep `size difference` <= 1
*
*/
// Rebalance the heaps to maintain a size difference of at most 1
if (this.small_pq.size() > this.big_pq.size() + 1) {
this.big_pq.add(this.small_pq.poll());
} else if (this.big_pq.size() > this.small_pq.size() + 1) {
this.small_pq.add(this.big_pq.poll());
}
}
public double findMedian() {
/**
* to find the median:
*
* - If the total number of elements is `odd`,
* the median is the root of the larger heap
* (which will have one more element).
*
*
* - If the total number of elements is `even`,
* the median is the average of the roots of both heaps.
*
*/
int size = this.small_pq.size() + this.big_pq.size();
if (size % 2 == 1) {
// Odd number of elements, median is the top of the larger heap
if (this.small_pq.size() > this.big_pq.size()) {
return this.small_pq.peek();
} else {
return this.big_pq.peek();
}
} else {
// Even number of elements, median is the average of the top of both heaps
return (this.small_pq.peek() + this.big_pq.peek()) / 2.0;
}
}
}
// V0-1
// IDEA: SORTING (TLE)
class MedianFinder_0_1 {
// attr
List<Integer> collected;
int cnt;
public MedianFinder_0_1() {
this.cnt = 0;
this.collected = new ArrayList<>();
}
public void addNum(int num) {
this.collected.add(num);
// sort (increasing) (small -> big)
Collections.sort(this.collected, new Comparator<Integer>() {
@Override
public int compare(Integer o1, Integer o2) {
int diff = o1 - o2;
return diff;
}
});
this.cnt += 1;
}
public double findMedian() {
if (this.cnt == 0 || this.collected == null) {
return 0;
}
if (this.cnt == 1) {
return this.collected.get(0);
}
/**
* if cnt is odd, [1,2,3]
* if cnt is even, [1,2,3,4]
*/
// System.out.println(">>> this.cnt= " + this.cnt + ", this.collected = " +
// this.collected);
/**
* NOTE !!!
*
* be careful on which val we use:
*
* this.cnt VS array's leftmost idx
*
*/
int midIdx = -1;
if (this.cnt % 2 == 0) {
midIdx = this.cnt / 2;
return (this.collected.get(midIdx) + this.collected.get(midIdx - 1)) / 2.0;
} else {
// midIdx = this.cnt / 2;
return this.collected.get(this.cnt / 2);
}
}
}
// V1-1
// https://neetcode.io/problems/find-median-in-a-data-stream
// IDEA: SORTING
public class MedianFinder_1_1 {
private ArrayList<Integer> data;
public MedianFinder_1_1() {
data = new ArrayList<>();
}
public void addNum(int num) {
data.add(num);
}
public double findMedian() {
Collections.sort(data);
int n = data.size();
if ((n & 1) == 1) {
return data.get(n / 2);
} else {
return (data.get(n / 2) + data.get(n / 2 - 1)) / 2.0;
}
}
}
// V1-2
// https://neetcode.io/problems/find-median-in-a-data-stream
// IDEA: HEAP
/**
* Time Complexity:
* - addNum(int num):
* The time complexity of adding an element is O(log N)
* due to the heap operations (insert and balance).
*
* - findMedian():
* The time complexity is O(1) because the median is
* always the root of one or both heaps, which can be accessed in constant time.
*
*/
public class MedianFinder_1_2 {
private Queue<Integer> smallHeap; // small elements - maxHeap
private Queue<Integer> largeHeap; // large elements - minHeap
public MedianFinder_1_2() {
smallHeap = new PriorityQueue<>((a, b) -> b - a);
largeHeap = new PriorityQueue<>((a, b) -> a - b);
}
/**
* Steps
*
* step 1) First, the number is added to the smallHeap (max-heap).
*
* step 2) Then, the method checks if the size of smallHeap exceeds
* the size of largeHeap by more than 1, or if the root of
* smallHeap (the largest element in the smaller half) is greater than
* the root of largeHeap (the smallest element in the larger half).
* If either condition is true, the root of smallHeap is moved to largeHeap.
*
* step 3) If the size of largeHeap exceeds the size of smallHeap by more than 1,
* the root of largeHeap (the smallest element
* in the larger half) is moved to smallHeap.
*
*
* step 4) This balancing ensures that the two heaps are either of
* the same size or smallHeap has one extra element than largeHeap.
*
*/
public void addNum(int num) {
smallHeap.add(num);
if (smallHeap.size() - largeHeap.size() > 1 ||
!largeHeap.isEmpty() &&
smallHeap.peek() > largeHeap.peek()) {
largeHeap.add(smallHeap.poll());
}
if (largeHeap.size() - smallHeap.size() > 1) {
smallHeap.add(largeHeap.poll());
}
}
/**
* Steps
*
* This method returns the median of all the elements in the data structure.
*
* step 1) If both heaps are of the same size, the median
* is the average of the roots of both heaps
* (since the heaps are balanced).
*
* step 2) If smallHeap has more elements than largeHeap
* (i.e., smallHeap has one extra element),
* the median is the root of smallHeap.
*
* step 3) If largeHeap has more elements
* (which can happen, but it shouldn't happen frequently due to balancing),
* the median is the root of largeHeap.
*
*/
public double findMedian() {
if (smallHeap.size() == largeHeap.size()) {
return (double) (largeHeap.peek() + smallHeap.peek()) / 2;
} else if (smallHeap.size() > largeHeap.size()) {
return (double) smallHeap.peek();
} else {
return (double) largeHeap.peek();
}
}
}
// V-2
// https://leetcode.com/problems/find-median-from-data-stream/solutions/74047/javapython-two-heap-solution-olog-n-add-gh8zw/
// IDEA: HEAP
class MedianFinder_2 {
private PriorityQueue<Integer> small = new PriorityQueue<>(Collections.reverseOrder());
private PriorityQueue<Integer> large = new PriorityQueue<>();
private boolean even = true;
public double MedianFinder_2() {
if (even)
return (small.peek() + large.peek()) / 2.0;
else
return small.peek();
}
public void addNum(int num) {
if (even) {
large.offer(num);
small.offer(large.poll());
} else {
small.offer(num);
large.offer(small.poll());
}
even = !even;
}
}
// V3
// https://leetcode.com/problems/find-median-from-data-stream/solutions/6446018/java-solutionfind-median-from-data-strea-mcjz/
// IDEA: HEAP
class MedianFinder_3 {
PriorityQueue<Integer> maxHeap;
PriorityQueue<Integer> minHeap;
public MedianFinder_3() {
maxHeap = new PriorityQueue<>(Collections.reverseOrder()); // . we need to store the elements in decreasing
// order so we use reverse order.
minHeap = new PriorityQueue<>();
}
public void addNum(int num) {
maxHeap.offer(num);
if (minHeap.size() > 0 && maxHeap.peek() > minHeap.peek()) {
minHeap.offer(maxHeap.poll());
}
if (maxHeap.size() - minHeap.size() >= 2) {
minHeap.offer(maxHeap.poll());
} else if (minHeap.size() > maxHeap.size()) {
maxHeap.offer(minHeap.poll());
}
}
public double findMedian() {
if (maxHeap.size() == minHeap.size()) {
return (maxHeap.peek() + minHeap.peek()) / 2.0;
} else {
return maxHeap.peek();
}
}
}
}