Skip to content

Commit

Permalink
tests: SegmentBitmapFilterTest supports common handle (#9954)
Browse files Browse the repository at this point in the history
ref #9963

- Make `SegmentBitmapFilterTest` support common handle.
- Introduce parameter `including_right_boundary` in some test utils to support generating test data with `std::numeric_limits<Int64>::max()`.

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
JinheLin and ti-chi-bot[bot] authored Mar 10, 2025
1 parent 3cda2f6 commit a74d83e
Show file tree
Hide file tree
Showing 9 changed files with 864 additions and 295 deletions.
158 changes: 158 additions & 0 deletions dbms/src/Storages/DeltaMerge/VersionChain/ColumnView.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// Copyright 2025 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <Columns/ColumnString.h>
#include <Columns/IColumn.h>
#include <Storages/DeltaMerge/DeltaMergeHelpers.h>

namespace DB::DM
{
// `ColumnView` is a class that provides unified access to both Int64 handles and String handles.
template <typename T>
class ColumnView
{
static_assert(false, "Only support Int64 and String");
};

template <>
class ColumnView<Int64>
{
public:
ColumnView(const IColumn & col)
: data(toColumnVectorData<Int64>(col))
{}

auto begin() const { return data.begin(); }

auto end() const { return data.end(); }

Int64 operator[](size_t index) const
{
assert(index < data.size());
return data[index];
}

size_t size() const { return data.size(); }

private:
const PaddedPODArray<Int64> & data;
};

template <>
class ColumnView<String>
{
public:
ColumnView(const IColumn & col)
: offsets(typeid_cast<const ColumnString &>(col).getOffsets())
, chars(typeid_cast<const ColumnString &>(col).getChars())
{}

class Iterator
{
public:
using iterator_category = std::random_access_iterator_tag;
using value_type = std::string_view;
using difference_type = std::ptrdiff_t;

Iterator(const IColumn::Offsets & offsets, const ColumnString::Chars_t & chars, size_t pos)
: pos(pos)
, offsets(&offsets)
, chars(&chars)
{}

value_type operator*() const
{
assert((*offsets)[-1] == 0);
const auto off = (*offsets)[pos - 1];
const auto size = (*offsets)[pos] - (*offsets)[pos - 1] - 1;
return std::string_view(reinterpret_cast<const char *>(chars->data() + off), size);
}

Iterator operator+(difference_type n) { return Iterator{*offsets, *chars, pos + n}; }

Iterator operator-(difference_type n) { return Iterator{*offsets, *chars, pos - n}; }

difference_type operator-(const Iterator & other) const { return pos - other.pos; }

Iterator & operator++()
{
++pos;
return *this;
}

Iterator & operator--()
{
--pos;
return *this;
}

Iterator operator++(int)
{
Iterator tmp = *this;
++pos;
return tmp;
}

Iterator operator--(int)
{
Iterator tmp = *this;
--pos;
return tmp;
}

Iterator & operator+=(difference_type n)
{
pos += n;
return *this;
}

Iterator & operator-=(difference_type n)
{
pos -= n;
return *this;
}

// Perform a lexicographic comparison of elements.
// Assume `this->offsets == other.offsets && this->chars == other.chars`,
// so it equal to `this->pos <=> other.pos`.
auto operator<=>(const Iterator & other) const = default;

private:
size_t pos = 0;
const IColumn::Offsets * offsets; // Using pointer for operator assignment
const ColumnString::Chars_t * chars;
};

auto begin() const { return Iterator(offsets, chars, 0); }

auto end() const { return Iterator(offsets, chars, offsets.size()); }

std::string_view operator[](size_t index) const
{
assert(index < offsets.size());
const auto off = offsets[index - 1];
const auto size = offsets[index] - offsets[index - 1] - 1;
return std::string_view(reinterpret_cast<const char *>(chars.data() + off), size);
}

size_t size() const { return offsets.size(); }

private:
const IColumn::Offsets & offsets;
const ColumnString::Chars_t & chars;
};

} // namespace DB::DM
47 changes: 22 additions & 25 deletions dbms/src/Storages/DeltaMerge/tests/DMTestEnv.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,7 @@

#include <vector>

namespace DB
{
namespace DM
{
namespace tests
namespace DB::DM::tests
{
#define GET_REGION_RANGE(start, end, table_id) \
RowKeyRange::fromHandleRange(::DB::DM::HandleRange((start), (end))).toRegionRange((table_id))
Expand Down Expand Up @@ -112,6 +108,14 @@ inline String genMockCommonHandle(Int64 value, size_t rowkey_column_size)
return ss.releaseStr();
}

inline Int64 decodeMockCommonHandle(const String & s)
{
size_t cursor = 0;
auto flag = ::DB::DecodeUInt<UInt8>(cursor, s);
RUNTIME_CHECK(flag == static_cast<UInt8>(TiDB::CodecFlagInt), flag);
return ::DB::DecodeInt64(cursor, s);
}

class DMTestEnv
{
public:
Expand Down Expand Up @@ -305,36 +309,32 @@ class DMTestEnv
size_t rowkey_column_size = 1,
bool with_internal_columns = true,
bool is_deleted = false,
bool with_nullable_uint64 = false)
bool with_nullable_uint64 = false,
bool including_right_boundary = false) // [beg, end) or [beg, end]
{
Block block;
const size_t num_rows = (end - beg);
const size_t num_rows = (end - beg) + including_right_boundary;
std::vector<Int64> handles(num_rows);
std::iota(handles.begin(), handles.end(), beg);
if (reversed)
std::reverse(handles.begin(), handles.end());
if (is_common_handle)
{
// common_pk_col
Strings values;
for (size_t i = 0; i < num_rows; i++)
{
Int64 value = reversed ? end - 1 - i : beg + i;
values.emplace_back(genMockCommonHandle(value, rowkey_column_size));
}
for (Int64 h : handles)
values.emplace_back(genMockCommonHandle(h, rowkey_column_size));
block.insert(DB::tests::createColumn<String>(std::move(values), pk_name_, pk_col_id));
}
else
{
// int-like pk_col
block.insert(ColumnWithTypeAndName{
DB::tests::makeColumn<Int64>(pk_type, createNumbers<Int64>(beg, end, reversed)),
pk_type,
pk_name_,
pk_col_id});
block.insert(
ColumnWithTypeAndName{DB::tests::makeColumn<Int64>(pk_type, handles), pk_type, pk_name_, pk_col_id});
// add extra column if need
if (pk_col_id != MutSup::extra_handle_id)
{
block.insert(ColumnWithTypeAndName{
DB::tests::makeColumn<Int64>(
MutSup::getExtraHandleColumnIntType(),
createNumbers<Int64>(beg, end, reversed)),
DB::tests::makeColumn<Int64>(MutSup::getExtraHandleColumnIntType(), handles),
MutSup::getExtraHandleColumnIntType(),
MutSup::extra_handle_column_name,
MutSup::extra_handle_id});
Expand Down Expand Up @@ -568,7 +568,4 @@ class DMTestEnv
return num++;
}
};

} // namespace tests
} // namespace DM
} // namespace DB
} // namespace DB::DM::tests
28 changes: 22 additions & 6 deletions dbms/src/Storages/DeltaMerge/tests/gtest_dm_vector_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,7 +1196,7 @@ class VectorIndexSegmentTestBase
ColumnDefines columns_to_read,
ANNQueryInfoPtr ann_query)
{
auto range = buildRowKeyRange(begin, end);
auto range = buildRowKeyRange(begin, end, /*is_common_handle*/ false);
auto [segment, snapshot] = getSegmentForRead(segment_id);
// load DMilePackFilterResult for each DMFile
DMFilePackFilterResults pack_filter_results;
Expand Down Expand Up @@ -1228,9 +1228,15 @@ class VectorIndexSegmentTestBase
ColumnDefine cdPK() { return getExtraHandleColumnDefine(options.is_common_handle); }

protected:
Block prepareWriteBlockImpl(Int64 start_key, Int64 end_key, bool is_deleted) override
{
auto block = SegmentTestBasic::prepareWriteBlockImpl(start_key, end_key, is_deleted);
Block prepareWriteBlockImpl(
Int64 start_key,
Int64 end_key,
bool is_deleted,
bool including_right_boundary,
std::optional<UInt64> ts) override
{
auto block
= SegmentTestBasic::prepareWriteBlockImpl(start_key, end_key, is_deleted, including_right_boundary, ts);
block.insert(colVecFloat32(fmt::format("[{}, {})", start_key, end_key), vec_column_name, vec_column_id));
return block;
}
Expand Down Expand Up @@ -1666,9 +1672,19 @@ class VectorIndexSegmentExtraColumnTest
return ColumnDefine(extra_column_id, extra_column_name, tests::typeFromString("Int64"));
}

Block prepareWriteBlockImpl(Int64 start_key, Int64 end_key, bool is_deleted) override
Block prepareWriteBlockImpl(
Int64 start_key,
Int64 end_key,
bool is_deleted,
bool including_right_boundary,
std::optional<UInt64> ts) override
{
auto block = VectorIndexSegmentTestBase::prepareWriteBlockImpl(start_key, end_key, is_deleted);
auto block = VectorIndexSegmentTestBase::prepareWriteBlockImpl(
start_key,
end_key,
is_deleted,
including_right_boundary,
ts);
block.insert(
colInt64(fmt::format("[{}, {})", start_key + 1000, end_key + 1000), extra_column_name, extra_column_id));
return block;
Expand Down
Loading

0 comments on commit a74d83e

Please sign in to comment.