-
Notifications
You must be signed in to change notification settings - Fork 3.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[feature-wip](merge-on-write) MOW table support different primary keys and sort keys #24788
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang-tidy made some suggestions
vectorized::MutableBlock mutable_block = | ||
vectorized::MutableBlock::build_mutable_block(&in_block); | ||
|
||
std::vector<RowInBlock*> row_in_blocks; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'row_in_blocks' is not initialized [cppcoreguidelines-init-variables]
std::vector<RowInBlock*> row_in_blocks; | |
std::vector<RowInBlock*> row_in_blocks = 0; |
|
||
in_block = mutable_block.to_block(); | ||
SCOPED_RAW_TIMER(&_stat.put_into_output_ns); | ||
std::vector<int> row_pos_vec; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'row_pos_vec' is not initialized [cppcoreguidelines-init-variables]
std::vector<int> row_pos_vec; | |
std::vector<int> row_pos_vec = 0; |
@@ -350,13 +350,18 @@ Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column, | |||
Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, RowLocation* row_location) { | |||
RETURN_IF_ERROR(load_pk_index_and_bf()); | |||
bool has_seq_col = _tablet_schema->has_sequence_col(); | |||
bool has_rowid = !_tablet_schema->cluster_key_idxes().empty(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'has_rowid' is not initialized [cppcoreguidelines-init-variables]
bool has_rowid = !_tablet_schema->cluster_key_idxes().empty(); | |
bool has_rowid = false = !_tablet_schema->cluster_key_idxes().empty(); |
@@ -709,22 +738,100 @@ Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_po | |||
converted_result.second->get_data(), num_rows)); | |||
} | |||
if (_has_key) { | |||
if (_tablet_schema->keys_type() == UNIQUE_KEYS && _opts.enable_unique_key_merge_on_write) { | |||
bool need_primary_key_indexes = (_tablet_schema->keys_type() == UNIQUE_KEYS && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'need_primary_key_indexes' is not initialized [cppcoreguidelines-init-variables]
bool need_primary_key_indexes = (_tablet_schema->keys_type() == UNIQUE_KEYS && | |
bool need_primary_key_indexes = false = (_tablet_schema->keys_type() == UNIQUE_KEYS && |
if (!need_short_key_indexes) { | ||
std::string last_key; | ||
for (size_t pos = 0; pos < num_rows; pos++) { | ||
std::string key = _full_encode_keys(key_columns, pos); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'key' is not initialized [cppcoreguidelines-init-variables]
std::string key = _full_encode_keys(key_columns, pos); | |
std::string key = 0 = _full_encode_keys(key_columns, pos); |
std::vector<std::string> primary_keys; | ||
// keep primary keys in memory | ||
for (uint32_t pos = 0; pos < num_rows; pos++) { | ||
std::string key = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'key' is not initialized [cppcoreguidelines-init-variables]
std::string key = | |
std::string key = 0 = |
// sort primary keys | ||
std::sort(primary_keys.begin(), primary_keys.end()); | ||
// write primary keys | ||
std::string last_key; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'last_key' is not initialized [cppcoreguidelines-init-variables]
std::string last_key; | |
std::string last_key = 0; |
@@ -248,6 +248,7 @@ class TabletSchema { | |||
std::vector<TabletColumn>& mutable_columns(); | |||
size_t num_columns() const { return _num_columns; } | |||
size_t num_key_columns() const { return _num_key_columns; } | |||
std::vector<uint32_t> cluster_key_idxes() const { return _cluster_key_idxes; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: function 'cluster_key_idxes' should be marked [[nodiscard]] [modernize-use-nodiscard]
std::vector<uint32_t> cluster_key_idxes() const { return _cluster_key_idxes; } | |
[[nodiscard]] std::vector<uint32_t> cluster_key_idxes() const { return _cluster_key_idxes; } |
@@ -57,7 +57,7 @@ TEST_F(PrimaryKeyIndexTest, builder) { | |||
auto fs = io::global_local_filesystem(); | |||
EXPECT_TRUE(fs->create_file(filename, &file_writer).ok()); | |||
|
|||
PrimaryKeyIndexBuilder builder(file_writer.get(), 0); | |||
PrimaryKeyIndexBuilder builder(file_writer.get(), 0, 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: variable 'builder' is not initialized [cppcoreguidelines-init-variables]
PrimaryKeyIndexBuilder builder(file_writer.get(), 0, 0); | |
PrimaryKeyIndexBuilder builder = 0(file_writer.get(), 0, 0); |
run buildall |
(From new machine)TeamCity pipeline, clickbench performance test result: |
TeamCity be ut coverage result: |
run buildall |
TeamCity be ut coverage result: |
(From new machine)TeamCity pipeline, clickbench performance test result: |
114115e
to
b28cd8e
Compare
run buildall |
(From new machine)TeamCity pipeline, clickbench performance test result: |
TeamCity be ut coverage result: |
b28cd8e
to
b254d4a
Compare
run buildall |
b254d4a
to
d44818c
Compare
b68e609
to
f20bede
Compare
f20bede
to
8d367cc
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang-tidy made some suggestions
@@ -91,7 +94,7 @@ class MergeIndexDeleteBitmapCalculator { | |||
MergeIndexDeleteBitmapCalculator() = default; | |||
|
|||
Status init(RowsetId rowset_id, std::vector<SegmentSharedPtr> const& segments, | |||
size_t seq_col_length = 0, size_t max_batch_size = 1024); | |||
size_t seq_col_length = 0, size_t rowid_length = 0, size_t max_batch_size = 1024); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: 1024 is a magic number; consider replacing it with a named constant [readability-magic-numbers]
size_t seq_col_length = 0, size_t rowid_length = 0, size_t max_batch_size = 1024);
^
// used for unique-key with merge on write | ||
void _encode_seq_column(const vectorized::IOlapColumnDataAccessor* seq_column, size_t pos, | ||
string* encoded_keys); | ||
void _encode_rowid(const uint32_t rowid, string* encoded_keys); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: parameter 'rowid' is const-qualified in the function declaration; const-qualification of parameters only has an effect in function definitions [readability-avoid-const-params-in-decls]
void _encode_rowid(const uint32_t rowid, string* encoded_keys); | |
void _encode_rowid(uint32_t rowid, string* encoded_keys); |
293685a
to
b1a84a5
Compare
b1a84a5
to
4967df3
Compare
run buildall |
(From new machine)TeamCity pipeline, clickbench performance test result: |
4967df3
to
7684b08
Compare
run buildall |
TeamCity be ut coverage result: |
(From new machine)TeamCity pipeline, clickbench performance test result: |
run buildall |
378c20a
eef98b3
to
378c20a
Compare
run buildall |
TPC-H test result on machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
|
(From new machine)TeamCity pipeline, clickbench performance test result: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
PR approved by at least one committer and no changes requested. |
Proposed changes
MOW tables support define different primary keys and sort keys like this:
Further comments
If this is a relatively large or complex change, kick off the discussion at [email protected] by explaining why you chose the solution you did and what alternatives you considered, etc...