Skip to content

Commit 11cdc8b

Browse files
[MOD-9685] Introduce SVS Basic Benchmarks (#829)
* initial imp of training bm introduce BM_VecSimSVSTrain class with 2 methods: Train and TrainAsync add GoogleTest to benchmarks so we can use ASSERT_* API tieredIndexMock: possible to initialize with a specific thread count add train bemchmark to CI benchmark dispatcher * make bm_files general for other algs rename svs_training_fp32 ->svs_indices_training_fp32 add to bm_files.sh * replace std::formtat only supported from gcc13 with ostringstream * format * revrt assert * intialize quantBits move svs params init to CreateTieredSVSIndex only 5 iterations * move iterawtion logic to runTrainBMIteration add compressed index bm * assert depdnding on HAVE_SVS_LVQ * sepearate non compression and compression bm * TO REVERT !!! test abort * fix if else * revrt timeoutgurard vhanges * dont pause after training to see how it affrects performance remove some prints * fix #ifdef HAVE_SVS_LVQ to #if HAVE_SVS_LVQ * use pause timers its faster * do 3 iter instread of 5 and test if results are stable * use 5 again * fix download all all script * fp16 bm remove 100K * remove 100K from fp32 * increase timeout * try bigger machine * try a bigger machine * try 2 iter move UNIT_AND_ITERATIONS and QUANT_BITS_ARGS to bm_vecsim_basics_Svs * unify bm_training_initialize_fp32.h and bm_training_initialize_fp16.h to bm_training_initialize.h define DATA_TYPE_INDEX_T in bm_svs_training_fp*.cpp remove th 10k and 50k for arm * reevet timeout to 10 fix include header for fp16 * move CreateTieredSVSParams and verifyNumThreads to svs params * revert increease machine size * change assert to log * fix * fix2 * format * introduce bm_svs * add tiered add NewIndex from existing svs to tiered factory imp AddLabel add AddLabelBatches(not implmneted) take bm_utils from meiravg_svs_training_bm * introduce setUpdateTriggerThreshold in BUILD_TESTS move initialize index to a function introfuce bm function: addlabel: insert one by one AddLabelBatches: add in batches with one thread AddLabelAsync: add in batches with multiple threads * fix comment add to yml * remove lock * format * fix num threads in addlabelinplace fix assertupdateTriggerThreshold in AddLabelAsync * use train svs instead * format * small fixes * rename BM_VecSimSVSTrain->BM_VecSimSVS bm_vecsim_svs_train.h->bm_vecsim_svs * remove unrelated * align with new name * revert unnecessary changes in bm_vecsim_index add LVQ BM if HAVE_SVS_LVQ * fix include * fix quantbits * extract general * fix missing main on LVQ cpp * replace vectors file * run only BENCHMARK_MAIN * try dummy for mac * fix DATA_TYPE_INDEX_T definition LVQ * quantBits is now static and needs to be intizlied by the CPP file we hard code the name of the data file based on quantBits * TO REVERT: benchmark deletsion according to a when we exceed 0.5 index size this benchmark takes 20K ms (20s) for 500 vectors!!! that's a lot after revert - benchmark only gc to detrmine * REVERT svs.h change consolidation_threshold runGC instead of delete label to not be depnd on consolidation_threshold that can't be controloed and runs for vrey very long! * revert unrelated changes * fix LVQ8 cpp for non LVQ * foirmat * cleanups fix mac * remove new line in cmake * Update tests/benchmark/bm_vecsim_svs.h Co-authored-by: BenGoldberger <[email protected]> --------- Co-authored-by: BenGoldberger <[email protected]>
1 parent 1a15b59 commit 11cdc8b

File tree

13 files changed

+365
-23
lines changed

13 files changed

+365
-23
lines changed

.github/workflows/benchmark.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ on:
4343
- bm-updated-fp32-single
4444
- bm-svs-train-fp32
4545
- bm-svs-train-fp16
46+
- bm-basics-svs-fp32-single
4647
- bm-spaces
4748
description: 'Benchmarks set to run'
4849
default: benchmarks-all

src/VecSim/index_factories/tiered_factory.h

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "VecSim/memory/vecsim_malloc.h"
1414
#include "VecSim/vec_sim_index.h"
1515
#include "VecSim/algorithms/hnsw/hnsw_tiered.h"
16+
#include "VecSim/algorithms/svs/svs_tiered.h"
1617
#include "VecSim/algorithms/brute_force/brute_force.h"
1718
#include "VecSim/index_factories/factory_utils.h"
1819

@@ -58,8 +59,35 @@ VecSimIndex *NewIndex(const TieredIndexParams *params, HNSWIndex<DataType, DistT
5859
// verification of the backend index algorithm. To be removed once a proper verification is
5960
// introduced.
6061
namespace TieredSVSFactory {
61-
BFParams NewBFParams(const TieredIndexParams *params);
62+
63+
#if HAVE_SVS
64+
template <typename DataType>
65+
inline VecSimIndex *NewIndex(const TieredIndexParams *params,
66+
VecSimIndexAbstract<DataType, float> *svs_index) {
67+
// Initialize brute force index.
68+
BFParams bf_params = {.type = svs_index->getType(),
69+
.dim = svs_index->getDim(),
70+
.metric = svs_index->getMetric(),
71+
.multi = svs_index->isMultiValue(),
72+
.blockSize = svs_index->getBlockSize()};
73+
74+
AbstractIndexInitParams abstractInitParams =
75+
VecSimFactory::NewAbstractInitParams(&bf_params, params->primaryIndexParams->logCtx, false);
76+
assert(svs_index->getInputBlobSize() == abstractInitParams.storedDataSize);
77+
assert(svs_index->getStoredDataSize() == abstractInitParams.storedDataSize);
78+
auto frontendIndex = static_cast<BruteForceIndex<DataType, float> *>(
79+
BruteForceFactory::NewIndex(&bf_params, abstractInitParams, false));
80+
81+
// Create new tiered svs index
82+
std::shared_ptr<VecSimAllocator> management_layer_allocator =
83+
VecSimAllocator::newVecsimAllocator();
84+
85+
return new (management_layer_allocator)
86+
TieredSVSIndex<DataType>(svs_index, frontendIndex, *params, management_layer_allocator);
6287
}
6388
#endif
89+
BFParams NewBFParams(const TieredIndexParams *params);
90+
} // namespace TieredSVSFactory
91+
#endif
6492

6593
}; // namespace TieredFactory

tests/benchmark/benchmarks.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ if [ -z "$BM_TYPE" ] || [ "$BM_TYPE" = "benchmarks-all" ]; then
1111
echo updated_index_single_fp32
1212
echo svs_training_fp32
1313
echo svs_training_fp16
14+
echo basics_svs_single_fp32
15+
echo basics_svs_single_fp32_LVQ8
1416
echo spaces_fp32
1517
echo spaces_fp64
1618
echo spaces_bf16
@@ -87,10 +89,14 @@ elif [ "$BM_TYPE" = "bm-batch-iter-uint8-multi" ] ; then
8789
elif [ "$BM_TYPE" = "bm-updated-fp32-single" ] ; then
8890
echo updated_index_single_fp32
8991

92+
# SVS benchmarks
9093
elif [ "$BM_TYPE" = "bm-svs-train-fp32" ] ; then
9194
echo svs_training_fp32
9295
elif [ "$BM_TYPE" = "bm-svs-train-fp16" ] ; then
9396
echo svs_training_fp16
97+
elif [ "$BM_TYPE" = "bm-basics-svs-fp32-single" ] ; then
98+
echo basics_svs_single_fp32
99+
echo basics_svs_single_fp32_LVQ8
94100

95101
# Spaces benchmarks
96102
elif [ "$BM_TYPE" = "bm-spaces" ] ; then

tests/benchmark/bm_files.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ elif [ "$BM_TYPE" = "bm-svs-train-fp32" ] \
4949
then
5050
file_name="training"
5151
alg="svs"
52+
elif [ "$BM_TYPE" = "bm-basics-svs-fp32-single" ]; then
53+
file_name="basic_fp32"
54+
alg="svs"
5255
else
5356
echo "No files to download for BM_TYPE=$BM_TYPE"
5457
exit 0
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright (c) 2006-Present, Redis Ltd.
3+
* All rights reserved.
4+
*
5+
* Licensed under your choice of the Redis Source Available License 2.0
6+
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
7+
* GNU Affero General Public License v3 (AGPLv3).
8+
*/
9+
10+
#pragma once
11+
/**************************************
12+
Define and register tests
13+
NOTE: benchmarks' tests order can affect their results. Please add new benchmarks at the end of
14+
the file.
15+
***************************************/
16+
// deleteLabel one by one
17+
BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimSVS, BM_FUNC_NAME(BM_RunGC), DATA_TYPE_INDEX_T)
18+
(benchmark::State &st) { RunGC(st); }
19+
BENCHMARK_REGISTER_F(BM_VecSimSVS, BM_FUNC_NAME(BM_RunGC))
20+
->Unit(benchmark::kMillisecond)
21+
->Iterations(1)
22+
->Arg(50)
23+
->Arg(100)
24+
->Arg(250)
25+
->Arg(500)
26+
->ArgName("num_deletions");
27+
28+
// AddLabel one by one
29+
BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimSVS, BM_FUNC_NAME(BM_AddLabelOneByOne), DATA_TYPE_INDEX_T)
30+
(benchmark::State &st) { AddLabel(st); }
31+
BENCHMARK_REGISTER_F(BM_VecSimSVS, BM_FUNC_NAME(BM_AddLabelOneByOne))
32+
->Unit(benchmark::kMillisecond)
33+
->Iterations(BM_VecSimGeneral::block_size);
34+
35+
// Add vectors in batches via tiered index
36+
BENCHMARK_TEMPLATE_DEFINE_F(BM_VecSimSVS, BM_FUNC_NAME(BM_TriggerUpdateTiered), DATA_TYPE_INDEX_T)
37+
(benchmark::State &st) { TriggerUpdateTiered(st); }
38+
BENCHMARK_REGISTER_F(BM_VecSimSVS, BM_FUNC_NAME(BM_TriggerUpdateTiered))
39+
->Unit(benchmark::kMillisecond)
40+
->Iterations(1)
41+
->ArgsProduct({{static_cast<long int>(BM_VecSimGeneral::block_size), 5000,
42+
static_cast<long int>(10 * BM_VecSimGeneral::block_size)},
43+
{2, 4, 8}})
44+
->ArgNames({"update_threshold", "thread_count"})
45+
->MeasureProcessCPUTime();

tests/benchmark/bm_utils.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ CreateTieredSVSParams(VecSimParams &svs_params, tieredIndexMock &mock_thread_poo
2929

3030
template <typename data_t>
3131
static void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_num_threads,
32-
size_t expected_capcity) {
32+
size_t expected_capcity, std::string msg = "") {
3333
ASSERT_EQ(tiered_index->GetSVSIndex()->getThreadPoolCapacity(), expected_capcity)
34-
<< "thread pool capacity mismatch";
34+
<< msg << ": thread pool capacity mismatch";
3535
size_t num_reserved_threads = tiered_index->GetSVSIndex()->getNumThreads();
3636
if (num_reserved_threads < expected_num_threads) {
37-
std::cout << "WARNING: last reserved threads (" << num_reserved_threads
37+
std::cout << msg << ": WARNING: last reserved threads (" << num_reserved_threads
3838
<< ") is less than expected (" << expected_num_threads << ")." << std::endl;
3939
}
4040
}

0 commit comments

Comments
 (0)