@@ -89,6 +89,7 @@ class tdigest {
89
89
using vector_t = std::vector<T, Allocator>;
90
90
using vector_centroid = std::vector<centroid, typename std::allocator_traits<Allocator>::template rebind_alloc<centroid>>;
91
91
using vector_bytes = std::vector<uint8_t , typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t >>;
92
+ using vector_double = std::vector<double , typename std::allocator_traits<Allocator>::template rebind_alloc<double >>;
92
93
93
94
struct centroid_cmp {
94
95
centroid_cmp () {}
@@ -142,20 +143,67 @@ class tdigest {
142
143
*/
143
144
uint64_t get_total_weight () const ;
144
145
146
+ /* *
147
+ * Returns an instance of the allocator for this t-Digest.
148
+ * @return allocator
149
+ */
150
+ Allocator get_allocator () const ;
151
+
145
152
/* *
146
153
* Compute approximate normalized rank of the given value.
154
+ *
155
+ * <p>If the sketch is empty this throws std::runtime_error.
156
+ *
147
157
* @param value to be ranked
148
158
* @return normalized rank (from 0 to 1 inclusive)
149
159
*/
150
160
double get_rank (T value) const ;
151
161
152
162
/* *
153
163
* Compute approximate quantile value corresponding to the given normalized rank
164
+ *
165
+ * <p>If the sketch is empty this throws std::runtime_error.
166
+ *
154
167
* @param rank normalized rank (from 0 to 1 inclusive)
155
168
* @return quantile value corresponding to the given rank
156
169
*/
157
170
T get_quantile (double rank) const ;
158
171
172
+ /* *
173
+ * Returns an approximation to the Probability Mass Function (PMF) of the input stream
174
+ * given a set of split points.
175
+ *
176
+ * <p>If the sketch is empty this throws std::runtime_error.
177
+ *
178
+ * @param split_points an array of <i>m</i> unique, monotonically increasing values
179
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
180
+ *
181
+ * @param size the number of split points in the array
182
+ *
183
+ * @return an array of m+1 doubles each of which is an approximation
184
+ * to the fraction of the input stream values (the mass) that fall into one of those intervals.
185
+ */
186
+ vector_double get_PMF (const T* split_points, uint32_t size) const ;
187
+
188
+ /* *
189
+ * Returns an approximation to the Cumulative Distribution Function (CDF), which is the
190
+ * cumulative analog of the PMF, of the input stream given a set of split points.
191
+ *
192
+ * <p>If the sketch is empty this throws std::runtime_error.
193
+ *
194
+ * @param split_points an array of <i>m</i> unique, monotonically increasing values
195
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
196
+ *
197
+ * @param size the number of split points in the array
198
+ *
199
+ * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
200
+ * of the input stream given the split_points. The value at array position j of the returned
201
+ * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
202
+ * array. This can be viewed as array of ranks of the given split points plus one more value
203
+ * that is always 1.
204
+ */
205
+ vector_double get_CDF (const T* split_points, uint32_t size) const ;
206
+
159
207
/* *
160
208
* @return parameter k (compression) that was used to configure this t-Digest
161
209
*/
@@ -245,6 +293,8 @@ class tdigest {
245
293
// for compatibility with format of the reference implementation
246
294
static tdigest deserialize_compat (std::istream& is, const Allocator& allocator = Allocator());
247
295
static tdigest deserialize_compat (const void * bytes, size_t size, const Allocator& allocator = Allocator());
296
+
297
+ static inline void check_split_points (const T* values, uint32_t size);
248
298
};
249
299
250
300
} /* namespace datasketches */
0 commit comments