Skip to content

Commit 18e315b

Browse files
authored
Merge pull request #7 from nsone/3.0.6
3.0.6
2 parents 0f2c6da + de9b6ed commit 18e315b

File tree

17 files changed

+437
-108
lines changed

17 files changed

+437
-108
lines changed

3rd/datasketches/datasketches/cpc/cpc_sketch_impl.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,9 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
588588

589589
template<typename A>
590590
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
591+
ensure_minimum_memory(size, 8);
591592
const char* ptr = static_cast<const char*>(bytes);
593+
const char* base = static_cast<const char*>(bytes);
592594
uint8_t preamble_ints;
593595
ptr += copy_from_mem(ptr, &preamble_ints, sizeof(preamble_ints));
594596
uint8_t serial_version;
@@ -606,6 +608,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
606608
const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
607609
const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
608610
const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
611+
ensure_minimum_memory(size, preamble_ints << 2);
609612
compressed_state<A> compressed;
610613
compressed.table_data_words = 0;
611614
compressed.table_num_entries = 0;
@@ -614,30 +617,38 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
614617
double kxp = 0;
615618
double hip_est_accum = 0;
616619
if (has_table || has_window) {
620+
check_memory_size(ptr - base + sizeof(num_coupons), size);
617621
ptr += copy_from_mem(ptr, &num_coupons, sizeof(num_coupons));
618622
if (has_table && has_window) {
623+
check_memory_size(ptr - base + sizeof(compressed.table_num_entries), size);
619624
ptr += copy_from_mem(ptr, &compressed.table_num_entries, sizeof(compressed.table_num_entries));
620625
if (has_hip) {
626+
check_memory_size(ptr - base + sizeof(kxp) + sizeof(hip_est_accum), size);
621627
ptr += copy_from_mem(ptr, &kxp, sizeof(kxp));
622628
ptr += copy_from_mem(ptr, &hip_est_accum, sizeof(hip_est_accum));
623629
}
624630
}
625631
if (has_table) {
632+
check_memory_size(ptr - base + sizeof(compressed.table_data_words), size);
626633
ptr += copy_from_mem(ptr, &compressed.table_data_words, sizeof(compressed.table_data_words));
627634
}
628635
if (has_window) {
636+
check_memory_size(ptr - base + sizeof(compressed.window_data_words), size);
629637
ptr += copy_from_mem(ptr, &compressed.window_data_words, sizeof(compressed.window_data_words));
630638
}
631639
if (has_hip && !(has_table && has_window)) {
640+
check_memory_size(ptr - base + sizeof(kxp) + sizeof(hip_est_accum), size);
632641
ptr += copy_from_mem(ptr, &kxp, sizeof(kxp));
633642
ptr += copy_from_mem(ptr, &hip_est_accum, sizeof(hip_est_accum));
634643
}
635644
if (has_window) {
636645
compressed.window_data.resize(compressed.window_data_words);
646+
check_memory_size(ptr - base + sizeof(compressed.window_data_words) + sizeof(uint32_t), size);
637647
ptr += copy_from_mem(ptr, compressed.window_data.data(), compressed.window_data_words * sizeof(uint32_t));
638648
}
639649
if (has_table) {
640650
compressed.table_data.resize(compressed.table_data_words);
651+
check_memory_size(ptr - base + sizeof(compressed.table_data_words), size);
641652
ptr += copy_from_mem(ptr, compressed.table_data.data(), compressed.table_data_words * sizeof(uint32_t));
642653
}
643654
if (!has_window) compressed.table_num_entries = num_coupons;

3rd/datasketches/datasketches/cpc/cpc_union_impl.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ template<typename A>
2828
cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed):
2929
lg_k(lg_k),
3030
seed(seed),
31-
accumulator(new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed)),
31+
accumulator(nullptr),
3232
bit_matrix()
3333
{
3434
if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
3535
throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
3636
}
37+
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed);
3738
}
3839

3940
template<typename A>

3rd/datasketches/datasketches/fi/frequent_items_sketch_impl.hpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include <cstring>
2525
#include <limits>
2626

27+
#include "memory_operations.hpp"
28+
2729
namespace datasketches {
2830

2931
// clang++ seems to require this declaration for CMAKE_BUILD_TYPE='Debug"
@@ -209,6 +211,7 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
209211
const size_t size = header_size_bytes + get_serialized_size_bytes();
210212
vector_u8<A> bytes(size);
211213
uint8_t* ptr = bytes.data() + header_size_bytes;
214+
uint8_t* end_ptr = ptr + size;
212215

213216
const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
214217
ptr += copy_to_mem(&preamble_longs, ptr, sizeof(uint8_t));
@@ -245,7 +248,8 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_
245248
}
246249
ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
247250
AllocW().deallocate(weights, num_items);
248-
ptr += S().serialize(ptr, items, num_items);
251+
const size_t bytes_remaining = end_ptr - ptr;
252+
ptr += S().serialize(ptr, bytes_remaining, items, num_items);
249253
for (unsigned i = 0; i < num_items; i++) items[i].~T();
250254
A().deallocate(items, num_items);
251255
}
@@ -276,7 +280,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
276280
check_family_id(family_id);
277281
check_size(lg_cur_size, lg_max_size);
278282

279-
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_cur_size, lg_max_size);
283+
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
280284
if (!is_empty) {
281285
uint32_t num_items;
282286
is.read((char*)&num_items, sizeof(num_items));
@@ -308,7 +312,9 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
308312

309313
template<typename T, typename W, typename H, typename E, typename S, typename A>
310314
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
315+
ensure_minimum_memory(size, 8);
311316
const char* ptr = static_cast<const char*>(bytes);
317+
const char* base = static_cast<const char*>(bytes);
312318
uint8_t preamble_longs;
313319
ptr += copy_from_mem(ptr, &preamble_longs, sizeof(uint8_t));
314320
uint8_t serial_version;
@@ -330,8 +336,9 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
330336
check_serial_version(serial_version);
331337
check_family_id(family_id);
332338
check_size(lg_cur_size, lg_max_size);
339+
ensure_minimum_memory(size, 1 << preamble_longs);
333340

334-
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_cur_size, lg_max_size);
341+
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
335342
if (!is_empty) {
336343
uint32_t num_items;
337344
ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
@@ -345,9 +352,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
345352
// batch deserialization with intermediate array of items and weights
346353
typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
347354
W* weights = AllocW().allocate(num_items);
355+
ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
348356
ptr += copy_from_mem(ptr, weights, sizeof(W) * num_items);
349357
T* items = A().allocate(num_items);
350-
ptr += S().deserialize(ptr, items, num_items);
358+
const size_t bytes_remaining = size - (ptr - base);
359+
ptr += S().deserialize(ptr, bytes_remaining, items, num_items);
351360
for (uint32_t i = 0; i < num_items; i++) {
352361
sketch.update(std::move(items[i]), weights[i]);
353362
items[i].~T();
@@ -436,7 +445,7 @@ void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW weight) {
436445
// version for integral unsigned type - no-op
437446
template<typename T, typename W, typename H, typename E, typename S, typename A>
438447
template<typename WW, typename std::enable_if<std::is_integral<WW>::value && std::is_unsigned<WW>::value, int>::type>
439-
void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW weight) {}
448+
void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW) {}
440449

441450
// version for floating point type
442451
template<typename T, typename W, typename H, typename E, typename S, typename A>

3rd/datasketches/datasketches/fi/reverse_purge_hash_map_impl.hpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include <iterator>
2626
#include <cmath>
2727

28+
#include "MurmurHash3.h"
29+
2830
namespace datasketches {
2931

3032
// clang++ seems to require this declaration for CMAKE_BUILD_TYPE='Debug"
@@ -86,13 +88,18 @@ reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() {
8688
const uint32_t size = 1 << lg_cur_size;
8789
if (num_active > 0) {
8890
for (uint32_t i = 0; i < size; i++) {
89-
if (is_active(i)) keys[i].~K();
90-
if (--num_active == 0) break;
91+
if (is_active(i)) {
92+
keys[i].~K();
93+
if (--num_active == 0) break;
94+
}
9195
}
9296
}
93-
A().deallocate(keys, size);
94-
AllocV().deallocate(values, size);
95-
AllocU16().deallocate(states, size);
97+
if (keys != nullptr)
98+
A().deallocate(keys, size);
99+
if (values != nullptr)
100+
AllocV().deallocate(values, size);
101+
if (states != nullptr)
102+
AllocU16().deallocate(states, size);
96103
}
97104

98105
template<typename K, typename V, typename H, typename E, typename A>
@@ -142,7 +149,7 @@ V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) {
142149
template<typename K, typename V, typename H, typename E, typename A>
143150
V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const {
144151
const uint32_t mask = (1 << lg_cur_size) - 1;
145-
uint32_t probe = H()(key) & mask;
152+
uint32_t probe = fmix64(H()(key)) & mask;
146153
while (is_active(probe)) {
147154
if (E()(keys[probe], key)) return values[probe];
148155
probe = (probe + 1) & mask;
@@ -251,7 +258,7 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
251258
template<typename K, typename V, typename H, typename E, typename A>
252259
uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) {
253260
const uint32_t mask = (1 << lg_cur_size) - 1;
254-
uint32_t index = H()(key) & mask;
261+
uint32_t index = fmix64(H()(key)) & mask;
255262
uint16_t drift = 1;
256263
while (is_active(index)) {
257264
if (E()(keys[index], key)) {
@@ -326,7 +333,7 @@ V reverse_purge_hash_map<K, V, H, E, A>::purge() {
326333
}
327334
i++;
328335
}
329-
std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples - 1]);
336+
std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples]);
330337
const V median = samples[num_samples / 2];
331338
AllocV().deallocate(samples, limit);
332339
subtract_and_keep_positive_only(median);

3rd/datasketches/datasketches/kll/kll_helper.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class kll_helper {
6363
if (std::isnan(values[i])) {
6464
throw std::invalid_argument("Values must not be NaN");
6565
}
66-
if ((i < (size - 1)) and !(C()(values[i], values[i + 1]))) {
66+
if ((i < (size - 1)) && !(C()(values[i], values[i + 1]))) {
6767
throw std::invalid_argument("Values must be unique and monotonically increasing");
6868
}
6969
}
@@ -77,7 +77,7 @@ class kll_helper {
7777
static typename std::enable_if<!std::is_floating_point<T>::value, void>::type
7878
validate_values(const T* values, uint32_t size) {
7979
for (uint32_t i = 0; i < size ; i++) {
80-
if ((i < (size - 1)) and !(C()(values[i], values[i + 1]))) {
80+
if ((i < (size - 1)) && !(C()(values[i], values[i + 1]))) {
8181
throw std::invalid_argument("Values must be unique and monotonically increasing");
8282
}
8383
}

3rd/datasketches/datasketches/kll/kll_sketch_impl.hpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <iostream>
2424
#include <iomanip>
2525

26+
#include "memory_operations.hpp"
2627
#include "kll_helper.hpp"
2728

2829
namespace datasketches {
@@ -417,6 +418,7 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
417418
const size_t size = header_size_bytes + get_serialized_size_bytes();
418419
vector_u8<A> bytes(size);
419420
uint8_t* ptr = bytes.data() + header_size_bytes;
421+
uint8_t* end_ptr = ptr + size;
420422
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
421423
ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
422424
const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
@@ -440,10 +442,11 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
440442
ptr += copy_to_mem(&num_levels_, ptr, sizeof(num_levels_));
441443
ptr += copy_to_mem(&unused, ptr, sizeof(unused));
442444
ptr += copy_to_mem(levels_, ptr, sizeof(levels_[0]) * num_levels_);
443-
ptr += S().serialize(ptr, min_value_, 1);
444-
ptr += S().serialize(ptr, max_value_, 1);
445+
ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
446+
ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
445447
}
446-
ptr += S().serialize(ptr, &items_[levels_[0]], get_num_retained());
448+
const size_t bytes_remaining = end_ptr - ptr;
449+
ptr += S().serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
447450
}
448451
const size_t delta = ptr - bytes.data();
449452
if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
@@ -478,6 +481,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
478481

479482
template<typename T, typename C, typename S, typename A>
480483
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size) {
484+
ensure_minimum_memory(size, 8);
481485
const char* ptr = static_cast<const char*>(bytes);
482486
uint8_t preamble_ints;
483487
ptr += copy_from_mem(ptr, &preamble_ints, sizeof(preamble_ints));
@@ -496,6 +500,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
496500
check_preamble_ints(preamble_ints, flags_byte);
497501
check_serial_version(serial_version);
498502
check_family_id(family_id);
503+
ensure_minimum_memory(size, 1 << preamble_ints);
499504

500505
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
501506
return is_empty ? kll_sketch<T, C, S, A>(k) : kll_sketch<T, C, S, A>(k, flags_byte, bytes, size);
@@ -562,12 +567,14 @@ kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint8_t flags_byte, std::istream&
562567

563568
// for deserialization
564569
// the common part of the preamble was read and compatibility checks were done
570+
// we also assume we have already checked that the preamble information fits within the buffer
565571
template<typename T, typename C, typename S, typename A>
566572
kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint8_t flags_byte, const void* bytes, size_t size) {
567573
k_ = k;
568574
m_ = DEFAULT_M;
569575
const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM)); // used in serial version 2
570576
const char* ptr = static_cast<const char*>(bytes) + DATA_START_SINGLE_ITEM;
577+
const char* end_ptr = static_cast<const char*>(bytes) + size;
571578
if (is_single_item) {
572579
n_ = 1;
573580
min_k_ = k_;
@@ -591,13 +598,13 @@ kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint8_t flags_byte, const void* b
591598
min_value_ = A().allocate(1);
592599
max_value_ = A().allocate(1);
593600
if (!is_single_item) {
594-
ptr += S().deserialize(ptr, min_value_, 1);
595-
ptr += S().deserialize(ptr, max_value_, 1);
601+
ptr += S().deserialize(ptr, end_ptr - ptr, min_value_, 1);
602+
ptr += S().deserialize(ptr, end_ptr - ptr, max_value_, 1);
596603
}
597604
items_ = A().allocate(capacity);
598605
items_size_ = capacity;
599606
const auto num_items(levels_[num_levels_] - levels_[0]);
600-
ptr += S().deserialize(ptr, &items_[levels_[0]], num_items);
607+
ptr += S().deserialize(ptr, end_ptr - ptr, &items_[levels_[0]], num_items);
601608
if (is_single_item) {
602609
new (min_value_) T(items_[levels_[0]]);
603610
new (max_value_) T(items_[levels_[0]]);
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#ifndef _MEMORY_CHECKS_HPP_
21+
#define _MEMORY_CHECKS_HPP_
22+
23+
#include <memory>
24+
#include <exception>
25+
#include <iostream>
26+
27+
namespace datasketches {
28+
29+
static inline void ensure_minimum_memory(size_t bytes_available, size_t min_needed) {
30+
if (bytes_available < min_needed) {
31+
throw std::out_of_range("Insufficient buffer size detected: bytes available "
32+
+ std::to_string((int) bytes_available) + ", minimum needed " + std::to_string((int) min_needed));
33+
}
34+
}
35+
36+
static inline void check_memory_size(size_t requested_index, size_t capacity) {
37+
if (requested_index > capacity) {
38+
throw std::out_of_range("Attempt to access memory beyond limits: requested index "
39+
+ std::to_string((int) requested_index) + ", capacity " + std::to_string((int) capacity));
40+
}
41+
}
42+
43+
// note: size is in bytes, not items
44+
static inline size_t copy_from_mem(const void* src, void* dst, size_t size) {
45+
memcpy(dst, src, size);
46+
return size;
47+
}
48+
49+
// note: size is in bytes, not items
50+
static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
51+
memcpy(dst, src, size);
52+
return size;
53+
}
54+
55+
} // namespace
56+
57+
#endif // _MEMORY_CHECKS_HPP_

0 commit comments

Comments
 (0)