Skip to content

Commit 597fbea

Browse files
juwentus1234facebook-github-bot
authored andcommitted
feat: Add estimateFlatSize Benchmark (facebookincubator#13971)
Summary: Pull Request resolved: facebookincubator#13971 Add Benchmark for estimateFlatSize function bypass-github-export-checks Reviewed By: xiaoxmeng Differential Revision: D77612228 fbshipit-source-id: 4f79bd6684633ffc9eab6199a0d8c0a51026c114
1 parent bd5f771 commit 597fbea

File tree

2 files changed

+234
-0
lines changed

2 files changed

+234
-0
lines changed

velox/benchmarks/basic/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ add_executable(velox_benchmark_basic_decoded_vector DecodedVector.cpp)
4444
target_link_libraries(
4545
velox_benchmark_basic_decoded_vector ${velox_benchmark_deps})
4646

47+
add_executable(velox_benchmark_estimate_flat_size EstimateFlatSizeBenchmark.cpp)
48+
target_link_libraries(
49+
velox_benchmark_estimate_flat_size ${velox_benchmark_deps})
50+
4751
add_executable(velox_benchmark_basic_selectivity_vector SelectivityVector.cpp)
4852
target_link_libraries(
4953
velox_benchmark_basic_selectivity_vector ${velox_benchmark_deps})
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <folly/Benchmark.h>
18+
#include <folly/init/Init.h>
19+
#include <gflags/gflags.h>
20+
21+
#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h"
22+
#include "velox/vector/fuzzer/VectorFuzzer.h"
23+
24+
DEFINE_int64(fuzzer_seed, 99887766, "Seed for random input dataset generator");
25+
DEFINE_int64(vector_size, 10000, "Size of vectors to benchmark");
26+
DEFINE_int64(row_children, 1000, "Number of children in row vector");
27+
DEFINE_int64(dict_nesting, 5, "Number of dictionary nesting levels");
28+
29+
using namespace facebook::velox;
30+
using namespace facebook::velox::test;
31+
32+
namespace {
33+
34+
class EstimateFlatSizeBenchmark
35+
: public functions::test::FunctionBenchmarkBase {
36+
public:
37+
EstimateFlatSizeBenchmark(size_t vectorSize, size_t rowChildren)
38+
: FunctionBenchmarkBase(),
39+
vectorSize_(vectorSize),
40+
numRowChildren_(rowChildren) {
41+
VectorFuzzer::Options opts;
42+
opts.vectorSize = vectorSize_;
43+
opts.nullRatio = 0;
44+
opts.stringLength = 10;
45+
VectorFuzzer fuzzer(opts, pool(), FLAGS_fuzzer_seed);
46+
47+
// Create flat vectors of different types
48+
flatBigintVector_ = fuzzer.fuzzFlat(BIGINT());
49+
flatVarcharVector_ = fuzzer.fuzzFlat(VARCHAR());
50+
51+
// Create constant vectors
52+
constantBigintVector_ = fuzzer.fuzzConstant(BIGINT());
53+
constantVarcharVector_ = fuzzer.fuzzConstant(VARCHAR());
54+
55+
// Create dictionary vectors
56+
dictionaryBigintVector_ = fuzzer.fuzzDictionary(fuzzer.fuzzFlat(BIGINT()));
57+
dictionaryVarcharVector_ =
58+
fuzzer.fuzzDictionary(fuzzer.fuzzFlat(VARCHAR()));
59+
60+
// Create nested dictionary vector
61+
nestedDictionaryVector_ = fuzzer.fuzzFlat(BIGINT());
62+
for (size_t i = 0; i < 5; ++i) {
63+
nestedDictionaryVector_ = fuzzer.fuzzDictionary(nestedDictionaryVector_);
64+
}
65+
66+
// Create a nested row vector with complex children
67+
std::vector<std::string> names;
68+
std::vector<TypePtr> types;
69+
std::vector<VectorPtr> children;
70+
71+
// Create a mix of different types of children
72+
for (size_t i = 0; i < numRowChildren_; ++i) {
73+
names.push_back(fmt::format("field{}", i));
74+
75+
// Create different types of children based on the index
76+
switch (i % 5) {
77+
case 0: {
78+
// Flat vector
79+
types.push_back(BIGINT());
80+
children.push_back(fuzzer.fuzzFlat(BIGINT()));
81+
break;
82+
}
83+
case 1: {
84+
// Dictionary vector
85+
types.push_back(VARCHAR());
86+
children.push_back(fuzzer.fuzzDictionary(fuzzer.fuzzFlat(VARCHAR())));
87+
break;
88+
}
89+
case 2: {
90+
// Nested row vector
91+
std::vector<std::string> nestedNames = {
92+
"nested1", "nested2", "nested3"};
93+
std::vector<TypePtr> nestedTypes = {BIGINT(), VARCHAR(), DOUBLE()};
94+
auto nestedRowType =
95+
ROW(std::move(nestedNames), std::move(nestedTypes));
96+
types.push_back(nestedRowType);
97+
children.push_back(fuzzer.fuzzRow(nestedRowType, vectorSize_));
98+
break;
99+
}
100+
case 3: {
101+
// Array vector
102+
auto arrayType = ARRAY(BIGINT());
103+
types.push_back(arrayType);
104+
children.push_back(fuzzer.fuzzArray(BIGINT(), vectorSize_));
105+
break;
106+
}
107+
case 4: {
108+
// Map vector
109+
auto mapType = MAP(VARCHAR(), BIGINT());
110+
types.push_back(mapType);
111+
children.push_back(fuzzer.fuzzMap(VARCHAR(), BIGINT(), vectorSize_));
112+
break;
113+
}
114+
}
115+
}
116+
117+
auto rowType = ROW(std::move(names), std::move(types));
118+
rowVector_ = std::make_shared<RowVector>(
119+
pool(), rowType, nullptr, vectorSize_, std::move(children));
120+
}
121+
122+
// Benchmark methods for estimateFlatSize
123+
void estimateFlatSizeFlatBigint() {
124+
auto size = flatBigintVector_->estimateFlatSize();
125+
folly::doNotOptimizeAway(size);
126+
}
127+
128+
void estimateFlatSizeFlatVarchar() {
129+
auto size = flatVarcharVector_->estimateFlatSize();
130+
folly::doNotOptimizeAway(size);
131+
}
132+
133+
void estimateFlatSizeConstantBigint() {
134+
auto size = constantBigintVector_->estimateFlatSize();
135+
folly::doNotOptimizeAway(size);
136+
}
137+
138+
void estimateFlatSizeConstantVarchar() {
139+
auto size = constantVarcharVector_->estimateFlatSize();
140+
folly::doNotOptimizeAway(size);
141+
}
142+
143+
void estimateFlatSizeDictionaryBigint() {
144+
auto size = dictionaryBigintVector_->estimateFlatSize();
145+
folly::doNotOptimizeAway(size);
146+
}
147+
148+
void estimateFlatSizeDictionaryVarchar() {
149+
auto size = dictionaryVarcharVector_->estimateFlatSize();
150+
folly::doNotOptimizeAway(size);
151+
}
152+
153+
void estimateFlatSizeNestedDictionary() {
154+
auto size = nestedDictionaryVector_->estimateFlatSize();
155+
folly::doNotOptimizeAway(size);
156+
}
157+
158+
void estimateFlatSizeRowVector() {
159+
auto size = rowVector_->estimateFlatSize();
160+
folly::doNotOptimizeAway(size);
161+
}
162+
163+
private:
164+
const size_t vectorSize_;
165+
const size_t numRowChildren_;
166+
167+
VectorPtr flatBigintVector_;
168+
VectorPtr flatVarcharVector_;
169+
VectorPtr constantBigintVector_;
170+
VectorPtr constantVarcharVector_;
171+
VectorPtr dictionaryBigintVector_;
172+
VectorPtr dictionaryVarcharVector_;
173+
VectorPtr nestedDictionaryVector_;
174+
RowVectorPtr rowVector_;
175+
};
176+
177+
std::unique_ptr<EstimateFlatSizeBenchmark> benchmark;
178+
179+
template <typename Func>
180+
void run(Func&& func, size_t iterations = 100) {
181+
for (auto i = 0; i < iterations; i++) {
182+
func();
183+
}
184+
}
185+
186+
BENCHMARK(estimateFlatSizeFlatBigint) {
187+
run([&] { benchmark->estimateFlatSizeFlatBigint(); });
188+
}
189+
190+
BENCHMARK(estimateFlatSizeFlatVarchar) {
191+
run([&] { benchmark->estimateFlatSizeFlatVarchar(); });
192+
}
193+
194+
BENCHMARK(estimateFlatSizeConstantBigint) {
195+
run([&] { benchmark->estimateFlatSizeConstantBigint(); });
196+
}
197+
198+
BENCHMARK(estimateFlatSizeConstantVarchar) {
199+
run([&] { benchmark->estimateFlatSizeConstantVarchar(); });
200+
}
201+
202+
BENCHMARK(estimateFlatSizeDictionaryBigint) {
203+
run([&] { benchmark->estimateFlatSizeDictionaryBigint(); });
204+
}
205+
206+
BENCHMARK(estimateFlatSizeDictionaryVarchar) {
207+
run([&] { benchmark->estimateFlatSizeDictionaryVarchar(); });
208+
}
209+
210+
BENCHMARK(estimateFlatSizeNestedDictionary) {
211+
run([&] { benchmark->estimateFlatSizeNestedDictionary(); });
212+
}
213+
214+
BENCHMARK(estimateFlatSizeRowVector) {
215+
run([&] { benchmark->estimateFlatSizeRowVector(); });
216+
}
217+
218+
} // namespace
219+
220+
int main(int argc, char* argv[]) {
221+
folly::Init init{&argc, &argv};
222+
::gflags::ParseCommandLineFlags(&argc, &argv, true);
223+
memory::MemoryManager::initialize(memory::MemoryManager::Options{});
224+
225+
benchmark = std::make_unique<EstimateFlatSizeBenchmark>(
226+
FLAGS_vector_size, FLAGS_row_children);
227+
folly::runBenchmarks();
228+
benchmark.reset();
229+
return 0;
230+
}

0 commit comments

Comments
 (0)