From 5fb7847aaf1781fcb9e9d8fb6759871847e86ed5 Mon Sep 17 00:00:00 2001 From: litongxin Date: Sun, 3 Apr 2022 14:48:45 +0000 Subject: [PATCH 01/23] add test for disktable gc --- src/storage/table_test.cc | 231 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index f00b40dabb4..1f8835576c4 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -1927,6 +1927,237 @@ TEST_P(TableTest, AbsAndLat) { delete table; } +TEST_P(TableTest, AbsAndLatGC) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsAndLat, 100, 10); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsAndLat, 50, 8); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "test", "ts3", ::openmldb::type::kAbsAndLat, 70, 5); + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test" + std::to_string(i), + "testnew" + std::to_string(i), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 1) { + ASSERT_EQ(80, count); + } else if (i == 2) { + ASSERT_EQ(70, count); + } else { + ASSERT_EQ(100, count); + } + } + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + table->SchedGc(); + EXPECT_EQ(180, (int64_t)table->GetRecordIdxCnt()); + + delete table; +} + +TEST_P(TableTest, AbsAndLatGC1) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 10); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kLatestTime, 0, 8); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "test", "ts3", ::openmldb::type::kLatestTime, 0, 5); + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test", + "testnew", + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 1) { + ASSERT_EQ(80, count); + } else if (i == 2) { + ASSERT_EQ(70, count); + } else { + ASSERT_EQ(100, count); + } + } + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + table->SchedGc(); + EXPECT_EQ(180, (int64_t)table->GetRecordIdxCnt()); + + delete table; +} + +TEST_P(TableTest, AbsAndLatGC2) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 70, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i % 10), + "testnew"+ std::to_string(i % 10), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 1; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + ASSERT_EQ(70, count); + } else if (i == 1) { + ASSERT_EQ(50, count); + } + } + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + table->SchedGc(); + EXPECT_EQ(180, (int64_t)table->GetRecordIdxCnt()); + + delete table; +} + INSTANTIATE_TEST_CASE_P(TestMemAndHDD, TableTest, ::testing::Values(::openmldb::common::kMemory, ::openmldb::common::kHDD)); From 83da99f9f50e06264422ebcd8aab0ce23da803e9 Mon Sep 17 00:00:00 2001 From: litongxin Date: Sun, 17 Apr 2022 11:14:58 +0000 Subject: [PATCH 02/23] add more test for disktable gc --- src/storage/table_test.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 1f8835576c4..0df18220259 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -1963,8 +1963,8 @@ TEST_P(TableTest, AbsAndLatGC) { uint64_t ts = now - (99 - i) * 60 * 1000; std::string ts_str = std::to_string(ts); - std::vector row = {"test" + std::to_string(i), - "testnew" + std::to_string(i), + std::vector row = {"test" + std::to_string(i % 10), + "testnew" + std::to_string(i % 10), ts_str, ts_str, ts_str}; @@ -2041,8 +2041,8 @@ TEST_P(TableTest, AbsAndLatGC1) { uint64_t ts = now - (99 - i) * 60 * 1000; std::string ts_str = std::to_string(ts); - std::vector row = {"test", - "testnew", + std::vector row = {"test" + std::to_string(i % 10), + "testnew" + std::to_string(i % 10), ts_str, ts_str, ts_str}; @@ -2070,7 +2070,7 @@ TEST_P(TableTest, AbsAndLatGC1) { if (i == 1) { ASSERT_EQ(80, count); } else if (i == 2) { - ASSERT_EQ(70, count); + ASSERT_EQ(50, count); } else { ASSERT_EQ(100, count); } @@ -2153,7 +2153,7 @@ TEST_P(TableTest, AbsAndLatGC2) { EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); table->SchedGc(); - EXPECT_EQ(180, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(122, (int64_t)table->GetRecordIdxCnt()); delete table; } From ab06806717814f49aeb9d9d8e25c173b92a9df21 Mon Sep 17 00:00:00 2001 From: litongxin Date: Wed, 27 Apr 2022 16:53:15 +0000 Subject: [PATCH 03/23] fix wreorder --- src/storage/disk_table.cc | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 451deff6769..abf98b67912 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -987,12 +987,12 @@ DiskTableRowIterator::DiskTableRowIterator(rocksdb::DB* db, rocksdb::Iterator* i snapshot_(snapshot), record_idx_(1), expire_value_(expire_time, expire_cnt, ttl_type), - row_(), pk_(pk), row_pk_(pk), - ts_(ts), has_ts_idx_(has_ts_idx), - ts_idx_(ts_idx) {} + ts_(ts), + ts_idx_(ts_idx), + row_() {} DiskTableRowIterator::~DiskTableRowIterator() { delete it_; @@ -1097,6 +1097,27 @@ bool DiskTable::DeleteIndex(const std::string& idx_name) { } uint64_t DiskTable::GetRecordIdxCnt() { + auto inner_indexs = table_index_.GetAllInnerIndex(); + rocksdb::ReadOptions ro = rocksdb::ReadOptions(); + const rocksdb::Snapshot* snapshot = db_->GetSnapshot(); + + // for (size_t i = 0; i < inner_indexs->size(); i++) { + // bool is_valid = false; + // for (const auto& index_def : inner_indexs->at(i)->GetIndex()) { + // if (index_def && index_def->IsReady()) { + // rocksdb::Iterator* it = db_->NewIterator(ro, cf_hs_[i + 1]); + + // for (; it->Valid(); it->Next()) { + // uint32_t cur_ts_idx = UINT32_MAX; + // std::string cur_pk; + // uint64_t cur_ts; + + // ParseKeyAndTs(has_ts_idx, it->key(), cur_pk, cur_ts, cur_ts_idx); + // } + // } + // } + + // } // TODO(litongxin) return 0; } From aa8bb52d8beea13e87974e130e5b368ef0589d59 Mon Sep 17 00:00:00 2001 From: litongxin Date: Thu, 28 Apr 2022 17:36:16 +0000 Subject: [PATCH 04/23] finish latest idxcnt --- src/storage/disk_table.cc | 66 +++- src/storage/disk_table.h | 2 + src/storage/table_test.cc | 690 +++++++++++++++++++++++++++++++++++++- 3 files changed, 749 insertions(+), 9 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index abf98b67912..b0ac0e7b18c 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -171,6 +171,10 @@ bool DiskTable::InitColumnFamilyDescriptor() { cf_ds_.push_back(rocksdb::ColumnFamilyDescriptor(index_def->GetName(), cfo)); DEBUGLOG("add cf_name %s. tid %u pid %u", index_def->GetName().c_str(), id_, pid_); } + auto indexs = table_index_.GetAllIndex(); + for (uint32_t i = 0; i < indexs.size(); i++) { + idx_cnt_vec_.push_back(std::make_shared>(0)); + } return true; } @@ -208,6 +212,7 @@ bool DiskTable::Put(const std::string& pk, uint64_t time, const char* data, uint s = db_->Put(write_opts_, cf_hs_[1], spk, rocksdb::Slice(data, size)); if (s.ok()) { offset_.fetch_add(1, std::memory_order_relaxed); + idx_cnt_vec_[0]->fetch_add(1, std::memory_order_relaxed); return true; } else { DEBUGLOG("Put failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); @@ -258,6 +263,9 @@ bool DiskTable::Put(uint64_t time, const std::string& value, const Dimensions& d } s = db_->Write(write_opts_, &batch); if (s.ok()) { + for (Dimensions::const_iterator it = dimensions.begin(); it != dimensions.end(); ++it) { + idx_cnt_vec_[it->idx()]->fetch_add(1, std::memory_order_relaxed); + } offset_.fetch_add(1, std::memory_order_relaxed); return true; } else { @@ -336,7 +344,9 @@ void DiskTable::GcHead() { if (indexs.size() > 1) { bool need_ttl = false; std::map ttl_map; + std::map idx_map; for (const auto& index : indexs) { + idx_cnt_vec_[index->GetId()]->store(0, std::memory_order_relaxed); auto ts_col = index->GetTsColumn(); if (ts_col) { auto lat_ttl = index->GetTTL()->lat_ttl; @@ -344,6 +354,7 @@ void DiskTable::GcHead() { ttl_map.emplace(ts_col->GetId(), lat_ttl); need_ttl = true; } + idx_map.emplace(ts_col->GetId(), index->GetId()); } } if (!need_ttl) { @@ -358,14 +369,14 @@ void DiskTable::GcHead() { uint32_t ts_idx = 0; ParseKeyAndTs(true, it->key(), cur_pk, ts, ts_idx); if (!last_pk.empty() && cur_pk == last_pk) { + auto key_cnt_iter = key_cnt.find(ts_idx); + if (key_cnt_iter == key_cnt.end()) { + key_cnt.insert(std::make_pair(ts_idx, 1)); + } else { + key_cnt_iter->second++; + } auto ttl_iter = ttl_map.find(ts_idx); if (ttl_iter != ttl_map.end() && ttl_iter->second > 0) { - auto key_cnt_iter = key_cnt.find(ts_idx); - if (key_cnt_iter == key_cnt.end()) { - key_cnt.insert(std::make_pair(ts_idx, 1)); - } else { - key_cnt_iter->second++; - } if (key_cnt_iter->second > ttl_iter->second && delete_key_map.find(ts_idx) == delete_key_map.end()) { delete_key_map.insert(std::make_pair(ts_idx, ts)); @@ -381,6 +392,15 @@ void DiskTable::GcHead() { PDLOG(WARNING, "Delete failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); } } + for (auto ts_idx_iter = key_cnt.begin(); ts_idx_iter != key_cnt.end(); ts_idx_iter++) { + auto index_iterator = idx_map.find(ts_idx_iter->first); + auto ttl_iter = ttl_map.find(ts_idx_iter->first); + if (ttl_iter != ttl_map.end() && ttl_iter->second > 0 && ts_idx_iter->second <= ttl_iter->second) { + idx_cnt_vec_[index_iterator->second]->fetch_add(ts_idx_iter->second, std::memory_order_relaxed); + } else { + idx_cnt_vec_[index_iterator->second]->fetch_add(ttl_iter->second, std::memory_order_relaxed); + } + } delete_key_map.clear(); key_cnt.clear(); key_cnt.insert(std::make_pair(ts_idx, 1)); @@ -397,8 +417,28 @@ void DiskTable::GcHead() { PDLOG(WARNING, "Delete failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); } } + for (const auto& kv : delete_key_map) { + std::string combine_key1 = CombineKeyTs(last_pk, kv.second, kv.first); + std::string combine_key2 = CombineKeyTs(last_pk, 0, kv.first); + rocksdb::Status s = db_->DeleteRange(write_opts_, cf_hs_[idx + 1], rocksdb::Slice(combine_key1), + rocksdb::Slice(combine_key2)); + if (!s.ok()) { + PDLOG(WARNING, "Delete failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); + } + } + for (auto ts_idx_iter = key_cnt.begin(); ts_idx_iter != key_cnt.end(); ts_idx_iter++) { + auto index_iterator = idx_map.find(ts_idx_iter->first); + auto ttl_iter = ttl_map.find(ts_idx_iter->first); + if (ttl_iter != ttl_map.end() && ttl_iter->second > 0 && ts_idx_iter->second <= ttl_iter->second) { + idx_cnt_vec_[index_iterator->second]->fetch_add(ts_idx_iter->second, std::memory_order_relaxed); + } else { + idx_cnt_vec_[index_iterator->second]->fetch_add(ttl_iter->second, std::memory_order_relaxed); + } + } } else { auto index = indexs.front(); + uint32_t index_id = index->GetId(); + idx_cnt_vec_[index_id]->store(0, std::memory_order_relaxed); auto ttl_num = index->GetTTL()->lat_ttl; if (ttl_num < 1) { continue; @@ -425,11 +465,13 @@ void DiskTable::GcHead() { it->Seek(rocksdb::Slice(combine_key2)); } } else { + idx_cnt_vec_[index_id]->fetch_add(count); count = 1; last_pk = cur_pk; it->Next(); } } + idx_cnt_vec_[index_id]->fetch_add(count); } delete it; db_->ReleaseSnapshot(snapshot); @@ -1123,7 +1165,17 @@ uint64_t DiskTable::GetRecordIdxCnt() { } bool DiskTable::GetRecordIdxCnt(uint32_t idx, uint64_t** stat, uint32_t* size) { - // TODO(litongxin) + if (stat == NULL) { + return false; + } + std::shared_ptr index_def = table_index_.GetIndex(idx); + if (!index_def || !index_def->IsReady()) { + return false; + } + auto* data_array = new uint64_t[1]; + data_array[0] = idx_cnt_vec_[idx]->load(std::memory_order_relaxed); + *stat = data_array; + *size = 1; return true; } diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 190cdf2040c..8ba30a2f64f 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -434,6 +434,8 @@ class DiskTable : public Table { KeyTSComparator cmp_; std::atomic offset_; std::string table_path_; + std::atomic pk_cnt_; + std::vector>> idx_cnt_vec_; }; } // namespace storage diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 0df18220259..3f168ae0847 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2118,8 +2118,8 @@ TEST_P(TableTest, AbsAndLatGC2) { uint64_t ts = now - (99 - i) * 60 * 1000; std::string ts_str = std::to_string(ts); - std::vector row = {"test"+ std::to_string(i % 10), - "testnew"+ std::to_string(i % 10), + std::vector row = {"test"+ std::to_string(i), + "testnew"+ std::to_string(i), ts_str, ts_str, ts_str}; @@ -2152,8 +2152,694 @@ TEST_P(TableTest, AbsAndLatGC2) { } EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); + + table->SchedGc(); table->SchedGc(); EXPECT_EQ(122, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); + + delete table; +} + +TEST_P(TableTest, TsIdxCntPut) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 100, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 70, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 60, 0); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i % 10), + "testnew"+ std::to_string(i % 10), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + ASSERT_EQ(50, count); + } else if (i == 1) { + ASSERT_EQ(20, count); + } else if (i == 2) { + ASSERT_EQ(10, count); + } + } + + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); + + table->SchedGc(); + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); + + delete table; +} + +TEST_P(TableTest, TsIdxCntPut2) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 8); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kLatestTime, 0, 6); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kLatestTime, 0, 5); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i % 10), + "testnew"+ std::to_string(i % 10), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + EXPECT_EQ(80, count); + } else if (i == 1) { + EXPECT_EQ(50, count); + } else if (i == 2) { + EXPECT_EQ(60, count); + } + } + + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + + table->SchedGc(); + EXPECT_EQ(130, (int64_t)table->GetRecordIdxCnt()); + + delete table; +} + +TEST_P(TableTest, TsIdxCntPut3) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 8); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kLatestTime, 0, 6); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kLatestTime, 0, 5); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i % 10), + "testnew"+ std::to_string(i % 10), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + EXPECT_EQ(80, count); + } else if (i == 1) { + EXPECT_EQ(60, count); + } else if (i == 2) { + EXPECT_EQ(50, count); + } + } + + + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); + + table->SchedGc(); + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); + + delete table; +} + +TEST_P(TableTest, TsIdxCntPut4) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 60, 0); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i), + "testnew"+ std::to_string(i), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + EXPECT_EQ(90, count); + } else if (i == 1) { + EXPECT_EQ(50, count); + } else if (i == 2) { + EXPECT_EQ(60, count); + } + } + + + EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); + + table->SchedGc(); + table->SchedGc(); + table->SchedGc(); + table->SchedGc(); + EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); + + delete table; +} + +TEST_P(TableTest, TsIdxCntPut5) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts1", ::openmldb::type::kAbsoluteTime, 50, 0); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i), + "testnew"+ std::to_string(i), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 1; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + EXPECT_EQ(90, count); + } else if (i == 1) { + EXPECT_EQ(50, count); + } else if (i == 2) { + EXPECT_EQ(60, count); + } + } + + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + + table->SchedGc(); + EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); + + uint64_t* stats = NULL; + uint32_t size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); + int ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(91, ts_count); + + delete table; +} + +TEST_P(TableTest, TsIdxCntPut6) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 40, 0); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i), + "testnew"+ std::to_string(i), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + ::openmldb::api::Dimension* dim2 = request.add_dimensions(); + dim2->set_idx(2); + dim2->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + EXPECT_EQ(90, count); + } else if (i == 1) { + EXPECT_EQ(50, count); + } else if (i == 2) { + EXPECT_EQ(40, count); + } + } + + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + + table->SchedGc(); + EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); + + uint64_t* stats = NULL; + uint32_t size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); + int ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(51, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(41, ts_count); + + delete table; +} + +TEST_P(TableTest, TsIdxCntPut7) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 7); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kLatestTime, 0, 5); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kLatestTime, 0, 4); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i % 10), + "testnew"+ std::to_string(i % 10), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + ::openmldb::api::Dimension* dim2 = request.add_dimensions(); + dim2->set_idx(2); + dim2->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + EXPECT_EQ(70, count); + } else if (i == 1) { + EXPECT_EQ(50, count); + } else if (i == 2) { + EXPECT_EQ(40, count); + } + } + + + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + + table->SchedGc(); + EXPECT_EQ(140, (int64_t)table->GetRecordIdxCnt()); + + uint64_t* stats = NULL; + uint32_t size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); + int ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(50, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(40, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(70, ts_count); + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"testk"+ std::to_string(i % 10), + "testknew"+ std::to_string(i % 10), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + ::openmldb::api::Dimension* dim2 = request.add_dimensions(); + dim2->set_idx(2); + dim2->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(150, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(140, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(170, ts_count); delete table; } From c132afa3fe0a7e2dd1a55d2898342bae5fb8c367 Mon Sep 17 00:00:00 2001 From: litongxin Date: Mon, 2 May 2022 10:25:13 +0000 Subject: [PATCH 05/23] add new compaction filter and factory --- src/storage/disk_table.cc | 2 +- src/storage/disk_table.h | 70 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index b0ac0e7b18c..892776f0242 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -166,7 +166,7 @@ bool DiskTable::InitColumnFamilyDescriptor() { auto index_def = indexs.front(); if (index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsoluteTime || index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsOrLat) { - cfo.compaction_filter_factory = std::make_shared(inner_index); + cfo.compaction_filter_factory = std::make_shared(inner_index, &idx_cnt_vec_); } cf_ds_.push_back(rocksdb::ColumnFamilyDescriptor(index_def->GetName(), cfo)); DEBUGLOG("add cf_name %s. tid %u pid %u", index_def->GetName().c_str(), id_, pid_); diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 8ba30a2f64f..898e18370b9 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -198,17 +198,85 @@ class AbsoluteTTLCompactionFilter : public rocksdb::CompactionFilter { std::shared_ptr inner_index_; }; +class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { + public: + explicit AbsoluteTTLAndCountCompactionFilter(std::shared_ptr inner_index, + std::vector>>* idx_cnt_vec) + : inner_index_(inner_index), idx_cnt_vec_(idx_cnt_vec) {} + virtual ~AbsoluteTTLAndCountCompactionFilter() {} + + const char* Name() const override { return "AbsoluteTTLAndCountCompactionFilter"; } + + bool Filter(int /*level*/, const rocksdb::Slice& key, const rocksdb::Slice& /*existing_value*/, + std::string* /*new_value*/, bool* /*value_changed*/) const override { + if (key.size() < TS_LEN) { + return false; + } + uint64_t real_ttl = 0; + const auto& indexs = inner_index_->GetIndex(); + uint32_t idx; + if (indexs.size() > 1) { + if (key.size() < TS_LEN + TS_POS_LEN) { + return false; + } + uint32_t ts_idx = *((uint32_t*)(key.data() + key.size() - TS_LEN - // NOLINT + TS_POS_LEN)); + bool has_found = false; + for (const auto index : indexs) { + auto ts_col = index->GetTsColumn(); + if (!ts_col) { + return false; + } + if (ts_col->GetId() == ts_idx) { + real_ttl = index->GetTTL()->abs_ttl; + idx = index->GetId(); + has_found = true; + break; + } + } + if (!has_found) { + return false; + } + } else { + real_ttl = indexs.front()->GetTTL()->abs_ttl; + idx = indexs.front()->GetId(); + } + if (real_ttl < 1) { + return false; + } + uint64_t ts = 0; + memcpy(static_cast(&ts), key.data() + key.size() - TS_LEN, TS_LEN); + memrev64ifbe(static_cast(&ts)); + uint64_t cur_time = ::baidu::common::timer::get_micros() / 1000; + if (ts < cur_time - real_ttl) { + idx_cnt_vec_->at(idx)->fetch_add(1, std::memory_order_relaxed); + return true; + } + return false; + } + + private: + std::shared_ptr inner_index_; + std::vector>>* idx_cnt_vec_; +}; + class AbsoluteTTLFilterFactory : public rocksdb::CompactionFilterFactory { public: - explicit AbsoluteTTLFilterFactory(const std::shared_ptr& inner_index) : inner_index_(inner_index) {} + explicit AbsoluteTTLFilterFactory(const std::shared_ptr& inner_index, + std::vector>>* idx_cnt_vec) + : inner_index_(inner_index), idx_cnt_vec_(idx_cnt_vec) {} std::unique_ptr CreateCompactionFilter( const rocksdb::CompactionFilter::Context& context) override { + if (context.is_manual_compaction) { + return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); + } return std::unique_ptr(new AbsoluteTTLCompactionFilter(inner_index_)); } const char* Name() const override { return "AbsoluteTTLFilterFactory"; } private: std::shared_ptr inner_index_; + std::vector>>* idx_cnt_vec_; }; class DiskTableIterator : public TableIterator { From 3545a0e253eba2573652914253be2e435bb96b0a Mon Sep 17 00:00:00 2001 From: litongxin Date: Fri, 6 May 2022 13:56:28 +0000 Subject: [PATCH 06/23] add bloomfilter --- src/storage/disk_table.cc | 71 ++++++++++++++++++++- src/storage/disk_table.h | 34 ++++++++-- src/storage/table_test.cc | 129 ++++++++++++++++++++++---------------- 3 files changed, 174 insertions(+), 60 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 892776f0242..2136ea3a999 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -152,6 +152,8 @@ bool DiskTable::InitColumnFamilyDescriptor() { rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions())); auto inner_indexs = table_index_.GetAllInnerIndex(); for (const auto& inner_index : *inner_indexs) { + pk_cnt_vec_.push_back(std::make_shared>(0)); + bloom_filter_vec_.push_back(BloomFilter(1000, 100)); rocksdb::ColumnFamilyOptions cfo; if (storage_mode_ == ::openmldb::common::StorageMode::kSSD) { cfo = rocksdb::ColumnFamilyOptions(ssd_option_template); @@ -167,6 +169,7 @@ bool DiskTable::InitColumnFamilyDescriptor() { if (index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsoluteTime || index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsOrLat) { cfo.compaction_filter_factory = std::make_shared(inner_index, &idx_cnt_vec_); + PDLOG(ERROR, "init compaction filter factory"); } cf_ds_.push_back(rocksdb::ColumnFamilyDescriptor(index_def->GetName(), cfo)); DEBUGLOG("add cf_name %s. tid %u pid %u", index_def->GetName().c_str(), id_, pid_); @@ -213,6 +216,10 @@ bool DiskTable::Put(const std::string& pk, uint64_t time, const char* data, uint if (s.ok()) { offset_.fetch_add(1, std::memory_order_relaxed); idx_cnt_vec_[0]->fetch_add(1, std::memory_order_relaxed); + if (!bloom_filter_vec_[0].Valid(pk.c_str())) { + bloom_filter_vec_[0].Set(pk.c_str()); + pk_cnt_vec_[0]->fetch_add(1, std::memory_order_relaxed); + } return true; } else { DEBUGLOG("Put failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); @@ -265,6 +272,11 @@ bool DiskTable::Put(uint64_t time, const std::string& value, const Dimensions& d if (s.ok()) { for (Dimensions::const_iterator it = dimensions.begin(); it != dimensions.end(); ++it) { idx_cnt_vec_[it->idx()]->fetch_add(1, std::memory_order_relaxed); + int32_t inner_pos = table_index_.GetInnerIndexPos(it->idx()); + if (!bloom_filter_vec_[inner_pos].Valid(it->key().c_str())) { + bloom_filter_vec_[inner_pos].Set(it->key().c_str()); + pk_cnt_vec_[inner_pos]->fetch_add(1, std::memory_order_relaxed); + } } offset_.fetch_add(1, std::memory_order_relaxed); return true; @@ -324,6 +336,7 @@ bool DiskTable::Get(uint32_t idx, const std::string& pk, uint64_t ts, std::strin bool DiskTable::Get(const std::string& pk, uint64_t ts, std::string& value) { return Get(0, pk, ts, value); } void DiskTable::SchedGc() { + // GcTTL(); GcHead(); UpdateTTL(); } @@ -346,7 +359,6 @@ void DiskTable::GcHead() { std::map ttl_map; std::map idx_map; for (const auto& index : indexs) { - idx_cnt_vec_[index->GetId()]->store(0, std::memory_order_relaxed); auto ts_col = index->GetTsColumn(); if (ts_col) { auto lat_ttl = index->GetTTL()->lat_ttl; @@ -480,6 +492,18 @@ void DiskTable::GcHead() { PDLOG(INFO, "Gc used %lu second. tid %u pid %u", time_used / 1000, id_, pid_); } +void DiskTable::GcTTL() { + auto indexs = table_index_.GetAllIndex(); + for (const auto& index : indexs) { + idx_cnt_vec_[index->GetId()]->store(0, std::memory_order_relaxed); + } + auto s = db_->CompactRange(rocksdb::CompactRangeOptions(), nullptr, nullptr); + if (!s.ok()) { + PDLOG(WARNING, "Manual Compaction failed"); + } + PDLOG(ERROR, "Manual Compaction Finished"); +} + void DiskTable::GcTTLOrHead() {} void DiskTable::GcTTLAndHead() {} @@ -1180,8 +1204,12 @@ bool DiskTable::GetRecordIdxCnt(uint32_t idx, uint64_t** stat, uint32_t* size) { } uint64_t DiskTable::GetRecordPkCnt() { - // TODO(litongxin) - return 0; + auto inner_indexs = table_index_.GetAllInnerIndex(); + uint64_t count = 0; + for (uint32_t i = 0; i < inner_indexs->size(); i++) { + count += pk_cnt_vec_[i]->load(std::memory_order_relaxed); + } + return count; } uint64_t DiskTable::GetRecordIdxByteSize() { @@ -1241,5 +1269,42 @@ int DiskTable::GetCount(uint32_t index, const std::string& pk, uint64_t& count) return 0; } +uint32_t BloomFilter::Hash(const char *str, uint32_t seed) +{ + // unsigned int b = 378551; + uint a = 63689; + uint hash = 0; + + while (*str) + { + hash = hash * a + (*str++); + a *= seed; + } + + return (hash & 0x7FFFFFFF); +} + +void BloomFilter::Set(const char *str) +{ + for (int i = 0; i < k_; ++i) + { + uint32_t p = Hash(str, base_[i]) % 1000000; + bit_[p] = 1; + } + +} + +bool BloomFilter::Valid(const char *str) +{ + for (int i = 0; i < k_; ++i) + { + uint32_t p = Hash(str, base_[i]) % 1000000; + if (!bit_[p]) { + return false; + } + } + return true; +} + } // namespace storage } // namespace openmldb diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 898e18370b9..f23c0820a56 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "base/endianconv.h" #include "base/slice.h" #include "boost/lexical_cast.hpp" @@ -39,6 +40,8 @@ #include "rocksdb/utilities/checkpoint.h" #include "storage/iterator.h" #include "storage/table.h" +#include "base/glog_wapper.h" // NOLINT + namespace openmldb { namespace storage { @@ -126,6 +129,22 @@ class KeyTSComparator : public rocksdb::Comparator { void FindShortSuccessor(std::string* /*key*/) const override {} }; +class BloomFilter { + public: + BloomFilter(uint32_t bitset_size, uint32_t string_cnt) : bitset_size_(bitset_size), string_cnt_(string_cnt) { k_ = ceil((bitset_size / string_cnt) * std::log(2)); } + virtual ~BloomFilter() {} + + uint32_t Hash(const char* str, uint32_t seed); + void Set(const char* str); + bool Valid(const char* str); + + private: + uint32_t k_, bitset_size_, + string_cnt_; // k:number of the hash functions //bitset_size:the size of bitset //string_cnt:number of strings to hash (k = [bitset_size/string_cnt]*ln2) + std::bitset<1<<20> bit_; + uint32_t base_[100] = {5, 7, 11, 13, 31, 37, 61}; +}; + class KeyTsPrefixTransform : public rocksdb::SliceTransform { public: const char* Name() const override { return "KeyTsPrefixTransform"; } @@ -152,6 +171,7 @@ class AbsoluteTTLCompactionFilter : public rocksdb::CompactionFilter { bool Filter(int /*level*/, const rocksdb::Slice& key, const rocksdb::Slice& /*existing_value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { + PDLOG(ERROR, "using compaction filter"); if (key.size() < TS_LEN) { return false; } @@ -209,6 +229,8 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { bool Filter(int /*level*/, const rocksdb::Slice& key, const rocksdb::Slice& /*existing_value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { + PDLOG(ERROR, "using compaction filter"); + idx_cnt_vec_->at(1)->fetch_add(1, std::memory_order_relaxed); if (key.size() < TS_LEN) { return false; } @@ -267,10 +289,11 @@ class AbsoluteTTLFilterFactory : public rocksdb::CompactionFilterFactory { : inner_index_(inner_index), idx_cnt_vec_(idx_cnt_vec) {} std::unique_ptr CreateCompactionFilter( const rocksdb::CompactionFilter::Context& context) override { - if (context.is_manual_compaction) { - return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); - } - return std::unique_ptr(new AbsoluteTTLCompactionFilter(inner_index_)); + // if (context.is_manual_compaction) { + // return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); + // } + // return std::unique_ptr(new AbsoluteTTLCompactionFilter(inner_index_)); + return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); } const char* Name() const override { return "AbsoluteTTLFilterFactory"; } @@ -471,6 +494,7 @@ class DiskTable : public Table { void SchedGc() override; void GcHead(); + void GcTTL(); void GcTTLAndHead(); void GcTTLOrHead(); @@ -504,6 +528,8 @@ class DiskTable : public Table { std::string table_path_; std::atomic pk_cnt_; std::vector>> idx_cnt_vec_; + std::vector>> pk_cnt_vec_; + std::vector bloom_filter_vec_; }; } // namespace storage diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 3f168ae0847..825e69a3631 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2657,9 +2657,6 @@ TEST_P(TableTest, TsIdxCntPut6) { EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - - table->SchedGc(); - EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); uint64_t* stats = NULL; uint32_t size = 0; @@ -2668,6 +2665,30 @@ TEST_P(TableTest, TsIdxCntPut6) { for (int i = 0; i < size; i++) { ts_count += stats[i]; } + EXPECT_EQ(100, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(100, ts_count); + + table->SchedGc(); + table->SchedGc(); + table->SchedGc(); + + EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } EXPECT_EQ(51, ts_count); stats = NULL; @@ -2758,9 +2779,11 @@ TEST_P(TableTest, TsIdxCntPut7) { } + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); table->SchedGc(); + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); EXPECT_EQ(140, (int64_t)table->GetRecordIdxCnt()); uint64_t* stats = NULL; @@ -2790,56 +2813,56 @@ TEST_P(TableTest, TsIdxCntPut7) { } EXPECT_EQ(70, ts_count); - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"testk"+ std::to_string(i % 10), - "testknew"+ std::to_string(i % 10), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - ::openmldb::api::Dimension* dim2 = request.add_dimensions(); - dim2->set_idx(2); - dim2->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - stats = NULL; - size = 0; - ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); - ts_count = 0; - for (int i = 0; i < size; i++) { - ts_count += stats[i]; - } - EXPECT_EQ(150, ts_count); - - stats = NULL; - size = 0; - ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); - ts_count = 0; - for (int i = 0; i < size; i++) { - ts_count += stats[i]; - } - EXPECT_EQ(140, ts_count); - - stats = NULL; - size = 0; - ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); - ts_count = 0; - for (int i = 0; i < size; i++) { - ts_count += stats[i]; - } - EXPECT_EQ(170, ts_count); + // for (int i = 0; i < 100; i++) { + // uint64_t ts = now - (99 - i) * 60 * 1000; + // std::string ts_str = std::to_string(ts); + + // std::vector row = {"testk"+ std::to_string(i % 10), + // "testknew"+ std::to_string(i % 10), + // ts_str, + // ts_str, + // ts_str}; + // ::openmldb::api::PutRequest request; + // ::openmldb::api::Dimension* dim = request.add_dimensions(); + // dim->set_idx(0); + // dim->set_key(row[0]); + // ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + // dim1->set_idx(1); + // dim1->set_key(row[1]); + // ::openmldb::api::Dimension* dim2 = request.add_dimensions(); + // dim2->set_idx(2); + // dim2->set_key(row[1]); + // std::string value; + // ASSERT_EQ(0, codec.EncodeRow(row, &value)); + // table->Put(0, value, request.dimensions()); + // } + + // stats = NULL; + // size = 0; + // ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); + // ts_count = 0; + // for (int i = 0; i < size; i++) { + // ts_count += stats[i]; + // } + // EXPECT_EQ(150, ts_count); + + // stats = NULL; + // size = 0; + // ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); + // ts_count = 0; + // for (int i = 0; i < size; i++) { + // ts_count += stats[i]; + // } + // EXPECT_EQ(140, ts_count); + + // stats = NULL; + // size = 0; + // ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); + // ts_count = 0; + // for (int i = 0; i < size; i++) { + // ts_count += stats[i]; + // } + // EXPECT_EQ(170, ts_count); delete table; } From 0a10b41721221225c5673304b191c5686cb492ee Mon Sep 17 00:00:00 2001 From: litongxin Date: Sun, 8 May 2022 09:40:38 +0000 Subject: [PATCH 07/23] adding bloomFilter in compaction filter --- src/storage/disk_table.cc | 37 ++++++++++++++++++++++++------------- src/storage/disk_table.h | 34 ++++++++++++++++++++++++---------- src/storage/table_test.cc | 2 -- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 2136ea3a999..f18b6113f13 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -165,12 +165,16 @@ bool DiskTable::InitColumnFamilyDescriptor() { cfo.comparator = &cmp_; cfo.prefix_extractor.reset(new KeyTsPrefixTransform()); const auto& indexs = inner_index->GetIndex(); - auto index_def = indexs.front(); - if (index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsoluteTime || - index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsOrLat) { - cfo.compaction_filter_factory = std::make_shared(inner_index, &idx_cnt_vec_); - PDLOG(ERROR, "init compaction filter factory"); + for (const auto& index_def : indexs) { + if (index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsoluteTime || + index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsOrLat) { + cfo.compaction_filter_factory = std::make_shared( + inner_index, &idx_cnt_vec_, pk_cnt_vec_[inner_index->GetId()], + &bloom_filter_vec_[inner_index->GetId()]); + break; + } } + auto index_def = indexs.front(); cf_ds_.push_back(rocksdb::ColumnFamilyDescriptor(index_def->GetName(), cfo)); DEBUGLOG("add cf_name %s. tid %u pid %u", index_def->GetName().c_str(), id_, pid_); } @@ -336,11 +340,19 @@ bool DiskTable::Get(uint32_t idx, const std::string& pk, uint64_t ts, std::strin bool DiskTable::Get(const std::string& pk, uint64_t ts, std::string& value) { return Get(0, pk, ts, value); } void DiskTable::SchedGc() { - // GcTTL(); + ClearRecord(); GcHead(); + GcTTL(); UpdateTTL(); } +void DiskTable::ClearRecord() { + auto indexs = table_index_.GetAllIndex(); + for (const auto& index : indexs) { + idx_cnt_vec_[index->GetId()]->store(0, std::memory_order_relaxed); + } +} + void DiskTable::GcHead() { uint64_t start_time = ::baidu::common::timer::get_micros() / 1000; auto inner_indexs = table_index_.GetAllInnerIndex(); @@ -493,13 +505,12 @@ void DiskTable::GcHead() { } void DiskTable::GcTTL() { - auto indexs = table_index_.GetAllIndex(); - for (const auto& index : indexs) { - idx_cnt_vec_[index->GetId()]->store(0, std::memory_order_relaxed); - } - auto s = db_->CompactRange(rocksdb::CompactRangeOptions(), nullptr, nullptr); - if (!s.ok()) { - PDLOG(WARNING, "Manual Compaction failed"); + auto inner_indexs = table_index_.GetAllInnerIndex(); + for (int i = 0; i < inner_indexs->size(); i++) { + auto s = db_->CompactRange(rocksdb::CompactRangeOptions(), cf_hs_[i + 1], nullptr, nullptr); + if (!s.ok()) { + PDLOG(WARNING, "Manual Compaction failed"); + } } PDLOG(ERROR, "Manual Compaction Finished"); } diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index f23c0820a56..5e765635791 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -221,16 +221,19 @@ class AbsoluteTTLCompactionFilter : public rocksdb::CompactionFilter { class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { public: explicit AbsoluteTTLAndCountCompactionFilter(std::shared_ptr inner_index, - std::vector>>* idx_cnt_vec) - : inner_index_(inner_index), idx_cnt_vec_(idx_cnt_vec) {} + std::vector>>* idx_cnt_vec, + std::shared_ptr> pk_cnt, + BloomFilter* bloom_filter) + : inner_index_(inner_index), + idx_cnt_vec_(idx_cnt_vec), + pk_cnt_(pk_cnt), + bloom_filter_(bloom_filter) {} virtual ~AbsoluteTTLAndCountCompactionFilter() {} const char* Name() const override { return "AbsoluteTTLAndCountCompactionFilter"; } bool Filter(int /*level*/, const rocksdb::Slice& key, const rocksdb::Slice& /*existing_value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { - PDLOG(ERROR, "using compaction filter"); - idx_cnt_vec_->at(1)->fetch_add(1, std::memory_order_relaxed); if (key.size() < TS_LEN) { return false; } @@ -271,15 +274,22 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { memrev64ifbe(static_cast(&ts)); uint64_t cur_time = ::baidu::common::timer::get_micros() / 1000; if (ts < cur_time - real_ttl) { - idx_cnt_vec_->at(idx)->fetch_add(1, std::memory_order_relaxed); return true; } + idx_cnt_vec_->at(idx)->fetch_add(1, std::memory_order_relaxed); + uint32_t inner_pos = inner_index_->GetId(); + if (!bloom_filter_vec_[inner_pos].Valid(it->key().c_str())) { + bloom_filter_vec_[inner_pos].Set(it->key().c_str()); + pk_cnt_vec_[inner_pos]->fetch_add(1, std::memory_order_relaxed); + } return false; } private: std::shared_ptr inner_index_; std::vector>>* idx_cnt_vec_; + std::shared_ptr> pk_cnt_; + BloomFilter* bloom_filter_; }; class AbsoluteTTLFilterFactory : public rocksdb::CompactionFilterFactory { @@ -289,17 +299,20 @@ class AbsoluteTTLFilterFactory : public rocksdb::CompactionFilterFactory { : inner_index_(inner_index), idx_cnt_vec_(idx_cnt_vec) {} std::unique_ptr CreateCompactionFilter( const rocksdb::CompactionFilter::Context& context) override { - // if (context.is_manual_compaction) { - // return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); - // } - // return std::unique_ptr(new AbsoluteTTLCompactionFilter(inner_index_)); - return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); + if (context.is_manual_compaction) { + return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_, pk_cnt_, bloom_filter_)); + } + return std::unique_ptr(new AbsoluteTTLCompactionFilter(inner_index_)); + // return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); } const char* Name() const override { return "AbsoluteTTLFilterFactory"; } private: std::shared_ptr inner_index_; std::vector>>* idx_cnt_vec_; + std::shared_ptr> pk_cnt_; + BloomFilter* bloom_filter_; + }; class DiskTableIterator : public TableIterator { @@ -493,6 +506,7 @@ class DiskTable : public Table { void SchedGc() override; + void ClearRecord(); void GcHead(); void GcTTL(); void GcTTLAndHead(); diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 825e69a3631..518232c62d6 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2676,8 +2676,6 @@ TEST_P(TableTest, TsIdxCntPut6) { } EXPECT_EQ(100, ts_count); - table->SchedGc(); - table->SchedGc(); table->SchedGc(); EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); From a5108131d794821bfbaa2015a05a9caf48a2d045 Mon Sep 17 00:00:00 2001 From: litongxin Date: Sun, 8 May 2022 17:00:46 +0000 Subject: [PATCH 08/23] add pk_cnt to absolute --- src/storage/disk_table.cc | 12 +++++++----- src/storage/disk_table.h | 24 +++++++++++++++--------- src/storage/table_test.cc | 6 ++++-- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index f18b6113f13..f2e400c01a8 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -151,9 +151,15 @@ bool DiskTable::InitColumnFamilyDescriptor() { cf_ds_.push_back( rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions())); auto inner_indexs = table_index_.GetAllInnerIndex(); - for (const auto& inner_index : *inner_indexs) { + for (uint32_t i = 0; i < inner_indexs->size(); i++) { pk_cnt_vec_.push_back(std::make_shared>(0)); bloom_filter_vec_.push_back(BloomFilter(1000, 100)); + } + auto indexs = table_index_.GetAllIndex(); + for (uint32_t i = 0; i < indexs.size(); i++) { + idx_cnt_vec_.push_back(std::make_shared>(0)); + } + for (const auto& inner_index : *inner_indexs) { rocksdb::ColumnFamilyOptions cfo; if (storage_mode_ == ::openmldb::common::StorageMode::kSSD) { cfo = rocksdb::ColumnFamilyOptions(ssd_option_template); @@ -178,10 +184,6 @@ bool DiskTable::InitColumnFamilyDescriptor() { cf_ds_.push_back(rocksdb::ColumnFamilyDescriptor(index_def->GetName(), cfo)); DEBUGLOG("add cf_name %s. tid %u pid %u", index_def->GetName().c_str(), id_, pid_); } - auto indexs = table_index_.GetAllIndex(); - for (uint32_t i = 0; i < indexs.size(); i++) { - idx_cnt_vec_.push_back(std::make_shared>(0)); - } return true; } diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 5e765635791..0c7ce99b0c5 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -131,7 +131,7 @@ class KeyTSComparator : public rocksdb::Comparator { class BloomFilter { public: - BloomFilter(uint32_t bitset_size, uint32_t string_cnt) : bitset_size_(bitset_size), string_cnt_(string_cnt) { k_ = ceil((bitset_size / string_cnt) * std::log(2)); } + BloomFilter(uint32_t bitset_size, uint32_t string_cnt) : k_(5), bitset_size_(bitset_size), string_cnt_(string_cnt) {} virtual ~BloomFilter() {} uint32_t Hash(const char* str, uint32_t seed); @@ -278,9 +278,15 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { } idx_cnt_vec_->at(idx)->fetch_add(1, std::memory_order_relaxed); uint32_t inner_pos = inner_index_->GetId(); - if (!bloom_filter_vec_[inner_pos].Valid(it->key().c_str())) { - bloom_filter_vec_[inner_pos].Set(it->key().c_str()); - pk_cnt_vec_[inner_pos]->fetch_add(1, std::memory_order_relaxed); + std::string pk; + if (indexs.size() > 1) { + pk.assign(key.data(), key.size() - TS_LEN - TS_POS_LEN); + } else { + pk.assign(key.data(), key.size() - TS_LEN); + } + if (!bloom_filter_->Valid(pk.c_str())) { + bloom_filter_->Set(pk.c_str()); + pk_cnt_->fetch_add(1, std::memory_order_relaxed); } return false; } @@ -295,15 +301,16 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { class AbsoluteTTLFilterFactory : public rocksdb::CompactionFilterFactory { public: explicit AbsoluteTTLFilterFactory(const std::shared_ptr& inner_index, - std::vector>>* idx_cnt_vec) - : inner_index_(inner_index), idx_cnt_vec_(idx_cnt_vec) {} + std::vector>>* idx_cnt_vec, + std::shared_ptr> pk_cnt, BloomFilter* bloom_filter) + : inner_index_(inner_index), idx_cnt_vec_(idx_cnt_vec), pk_cnt_(pk_cnt), bloom_filter_(bloom_filter) {} std::unique_ptr CreateCompactionFilter( const rocksdb::CompactionFilter::Context& context) override { if (context.is_manual_compaction) { - return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_, pk_cnt_, bloom_filter_)); + return std::unique_ptr( + new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_, pk_cnt_, bloom_filter_)); } return std::unique_ptr(new AbsoluteTTLCompactionFilter(inner_index_)); - // return std::unique_ptr(new AbsoluteTTLAndCountCompactionFilter(inner_index_, idx_cnt_vec_)); } const char* Name() const override { return "AbsoluteTTLFilterFactory"; } @@ -312,7 +319,6 @@ class AbsoluteTTLFilterFactory : public rocksdb::CompactionFilterFactory { std::vector>>* idx_cnt_vec_; std::shared_ptr> pk_cnt_; BloomFilter* bloom_filter_; - }; class DiskTableIterator : public TableIterator { diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 518232c62d6..8f55e9504ee 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2617,8 +2617,8 @@ TEST_P(TableTest, TsIdxCntPut6) { uint64_t ts = now - (99 - i) * 60 * 1000; std::string ts_str = std::to_string(ts); - std::vector row = {"test"+ std::to_string(i), - "testnew"+ std::to_string(i), + std::vector row = {"test"+ std::to_string(i / 10), + "testnew"+ std::to_string(i / 10), ts_str, ts_str, ts_str}; @@ -2657,6 +2657,7 @@ TEST_P(TableTest, TsIdxCntPut6) { EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); uint64_t* stats = NULL; uint32_t size = 0; @@ -2679,6 +2680,7 @@ TEST_P(TableTest, TsIdxCntPut6) { table->SchedGc(); EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); stats = NULL; size = 0; From 9de7c39a2c5038c0a1698a55383dc990b41efa05 Mon Sep 17 00:00:00 2001 From: litongxin Date: Mon, 9 May 2022 08:55:40 +0000 Subject: [PATCH 09/23] add pk_cnt to latest --- src/storage/disk_table.cc | 17 +++++++++++++++++ src/storage/disk_table.h | 8 +++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index f2e400c01a8..fcd7edff890 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -353,6 +353,11 @@ void DiskTable::ClearRecord() { for (const auto& index : indexs) { idx_cnt_vec_[index->GetId()]->store(0, std::memory_order_relaxed); } + auto inner_indexs = table_index_.GetAllInnerIndex(); + for (const auto& inner_index : *inner_indexs) { + pk_cnt_vec_[inner_index->GetId()]->store(0, std::memory_order_relaxed); + bloom_filter_vec_[inner_index->GetId()].Reset(); + } } void DiskTable::GcHead() { @@ -427,6 +432,10 @@ void DiskTable::GcHead() { idx_cnt_vec_[index_iterator->second]->fetch_add(ttl_iter->second, std::memory_order_relaxed); } } + if (!bloom_filter_vec_[idx].Valid(cur_pk.c_str())) { + bloom_filter_vec_[idx].Set(cur_pk.c_str()); + pk_cnt_vec_[idx]->fetch_add(1, std::memory_order_relaxed); + } delete_key_map.clear(); key_cnt.clear(); key_cnt.insert(std::make_pair(ts_idx, 1)); @@ -495,6 +504,10 @@ void DiskTable::GcHead() { count = 1; last_pk = cur_pk; it->Next(); + if (!bloom_filter_vec_[idx].Valid(cur_pk.c_str())) { + bloom_filter_vec_[idx].Set(cur_pk.c_str()); + pk_cnt_vec_[idx]->fetch_add(1, std::memory_order_relaxed); + } } } idx_cnt_vec_[index_id]->fetch_add(count); @@ -1319,5 +1332,9 @@ bool BloomFilter::Valid(const char *str) return true; } +void BloomFilter::Reset() { + bit_.reset(); +} + } // namespace storage } // namespace openmldb diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 0c7ce99b0c5..f812ee5d491 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -137,6 +137,7 @@ class BloomFilter { uint32_t Hash(const char* str, uint32_t seed); void Set(const char* str); bool Valid(const char* str); + void Reset(); private: uint32_t k_, bitset_size_, @@ -285,9 +286,10 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { pk.assign(key.data(), key.size() - TS_LEN); } if (!bloom_filter_->Valid(pk.c_str())) { - bloom_filter_->Set(pk.c_str()); - pk_cnt_->fetch_add(1, std::memory_order_relaxed); - } + bloom_filter_->Set(pk.c_str()); + pk_cnt_->fetch_add(1, std::memory_order_relaxed); + PDLOG(ERROR, "adding %s now pk_cnt is %d", pk.c_str(), pk_cnt_->load(std::memory_order_relaxed)); + } return false; } From f4551619feee9022323f9c2f7a4dbb3525b68532 Mon Sep 17 00:00:00 2001 From: litongxin Date: Mon, 9 May 2022 16:20:30 +0000 Subject: [PATCH 10/23] fix abs and lat ttl in one inner_index --- src/storage/disk_table.cc | 42 ++++++++------ src/storage/disk_table.h | 1 - src/storage/table_test.cc | 115 +++++++++++++++++++++++++++++++++++++- 3 files changed, 138 insertions(+), 20 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index fcd7edff890..da8985944d3 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -374,23 +374,23 @@ void DiskTable::GcHead() { it->SeekToFirst(); const auto& indexs = inner_index->GetIndex(); if (indexs.size() > 1) { - bool need_ttl = false; std::map ttl_map; std::map idx_map; + std::set abs_set; for (const auto& index : indexs) { auto ts_col = index->GetTsColumn(); if (ts_col) { auto lat_ttl = index->GetTTL()->lat_ttl; if (lat_ttl > 0) { ttl_map.emplace(ts_col->GetId(), lat_ttl); - need_ttl = true; + } + auto abs_ttl = index->GetTTL()->abs_ttl; + if (abs_ttl > 0) { + abs_set.insert(ts_col->GetId()); } idx_map.emplace(ts_col->GetId(), index->GetId()); } } - if (!need_ttl) { - continue; - } std::map key_cnt; std::map delete_key_map; std::string last_pk; @@ -399,6 +399,10 @@ void DiskTable::GcHead() { uint64_t ts = 0; uint32_t ts_idx = 0; ParseKeyAndTs(true, it->key(), cur_pk, ts, ts_idx); + if (abs_set.find(ts_idx) != abs_set.end()) { + it->Next(); + continue; + } if (!last_pk.empty() && cur_pk == last_pk) { auto key_cnt_iter = key_cnt.find(ts_idx); if (key_cnt_iter == key_cnt.end()) { @@ -432,9 +436,11 @@ void DiskTable::GcHead() { idx_cnt_vec_[index_iterator->second]->fetch_add(ttl_iter->second, std::memory_order_relaxed); } } - if (!bloom_filter_vec_[idx].Valid(cur_pk.c_str())) { - bloom_filter_vec_[idx].Set(cur_pk.c_str()); - pk_cnt_vec_[idx]->fetch_add(1, std::memory_order_relaxed); + if (key_cnt.size() > 0) { + if (!bloom_filter_vec_[idx].Valid(last_pk.c_str())) { + bloom_filter_vec_[idx].Set(last_pk.c_str()); + pk_cnt_vec_[idx]->fetch_add(1, std::memory_order_relaxed); + } } delete_key_map.clear(); key_cnt.clear(); @@ -452,15 +458,6 @@ void DiskTable::GcHead() { PDLOG(WARNING, "Delete failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); } } - for (const auto& kv : delete_key_map) { - std::string combine_key1 = CombineKeyTs(last_pk, kv.second, kv.first); - std::string combine_key2 = CombineKeyTs(last_pk, 0, kv.first); - rocksdb::Status s = db_->DeleteRange(write_opts_, cf_hs_[idx + 1], rocksdb::Slice(combine_key1), - rocksdb::Slice(combine_key2)); - if (!s.ok()) { - PDLOG(WARNING, "Delete failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); - } - } for (auto ts_idx_iter = key_cnt.begin(); ts_idx_iter != key_cnt.end(); ts_idx_iter++) { auto index_iterator = idx_map.find(ts_idx_iter->first); auto ttl_iter = ttl_map.find(ts_idx_iter->first); @@ -470,14 +467,23 @@ void DiskTable::GcHead() { idx_cnt_vec_[index_iterator->second]->fetch_add(ttl_iter->second, std::memory_order_relaxed); } } + if (key_cnt.size() > 0) { + if (!bloom_filter_vec_[idx].Valid(last_pk.c_str())) { + bloom_filter_vec_[idx].Set(last_pk.c_str()); + pk_cnt_vec_[idx]->fetch_add(1, std::memory_order_relaxed); + } + } } else { auto index = indexs.front(); uint32_t index_id = index->GetId(); - idx_cnt_vec_[index_id]->store(0, std::memory_order_relaxed); auto ttl_num = index->GetTTL()->lat_ttl; if (ttl_num < 1) { continue; } + auto abs_ttl_num = index->GetTTL()->abs_ttl; + if (abs_ttl_num > 0) { + continue; + } std::string last_pk; uint64_t count = 0; while (it->Valid()) { diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index f812ee5d491..eb9d85901e6 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -288,7 +288,6 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { if (!bloom_filter_->Valid(pk.c_str())) { bloom_filter_->Set(pk.c_str()); pk_cnt_->fetch_add(1, std::memory_order_relaxed); - PDLOG(ERROR, "adding %s now pk_cnt is %d", pk.c_str(), pk_cnt_->load(std::memory_order_relaxed)); } return false; } diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 8f55e9504ee..35ace4dcd58 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2680,7 +2680,7 @@ TEST_P(TableTest, TsIdxCntPut6) { table->SchedGc(); EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); - EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); + EXPECT_EQ(14, (int64_t)table->GetRecordPkCnt()); stats = NULL; size = 0; @@ -2867,6 +2867,119 @@ TEST_P(TableTest, TsIdxCntPut7) { delete table; } +TEST_P(TableTest, TsIdxCntPut8) { + ::openmldb::common::StorageMode storageMode = GetParam(); + ::openmldb::api::TableMeta table_meta; + table_meta.set_name("table1"); + std::string table_path = ""; + int id = 1; + if (storageMode == ::openmldb::common::kHDD) { + id = ++counter; + table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); + } + table_meta.set_tid(id); + table_meta.set_pid(1); + table_meta.set_seg_cnt(1); + table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); + table_meta.set_key_entry_max_height(8); + table_meta.set_storage_mode(storageMode); + table_meta.set_format_version(1); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); + SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); + + SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 7); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kLatestTime, 0, 4); + + + Table* table = CreateTable(table_meta, table_path); + table->Init(); + codec::SDKCodec codec(table_meta); + uint64_t now = ::baidu::common::timer::get_micros() / 1000; + + for (int i = 0; i < 100; i++) { + uint64_t ts = now - (99 - i) * 60 * 1000; + std::string ts_str = std::to_string(ts); + + std::vector row = {"test"+ std::to_string(i / 10), + "testnew"+ std::to_string(i / 10), + ts_str, + ts_str, + ts_str}; + ::openmldb::api::PutRequest request; + ::openmldb::api::Dimension* dim = request.add_dimensions(); + dim->set_idx(0); + dim->set_key(row[0]); + ::openmldb::api::Dimension* dim1 = request.add_dimensions(); + dim1->set_idx(1); + dim1->set_key(row[1]); + ::openmldb::api::Dimension* dim2 = request.add_dimensions(); + dim2->set_idx(2); + dim2->set_key(row[1]); + std::string value; + ASSERT_EQ(0, codec.EncodeRow(row, &value)); + table->Put(0, value, request.dimensions()); + } + + for (int i = 0; i <= 2; i++) { + TableIterator* it = table->NewTraverseIterator(i); + it->SeekToFirst(); + int count = 0; + while (it->Valid()) { + it->Next(); + count++; + } + + if (i == 0) { + EXPECT_EQ(70, count); + } else if (i == 1) { + EXPECT_EQ(50, count); + } else if (i == 2) { + EXPECT_EQ(40, count); + } + } + + + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); + EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); + + table->SchedGc(); + EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); + EXPECT_EQ(140, (int64_t)table->GetRecordIdxCnt()); + + uint64_t* stats = NULL; + uint32_t size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); + int ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(50, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(40, ts_count); + + stats = NULL; + size = 0; + ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); + ts_count = 0; + for (int i = 0; i < size; i++) { + ts_count += stats[i]; + } + EXPECT_EQ(70, ts_count); + + delete table; +} + INSTANTIATE_TEST_CASE_P(TestMemAndHDD, TableTest, ::testing::Values(::openmldb::common::kMemory, ::openmldb::common::kHDD)); From b0181ee2a77af9457b14bc1a5f1077c2eb238066 Mon Sep 17 00:00:00 2001 From: litongxin Date: Tue, 10 May 2022 13:01:29 +0000 Subject: [PATCH 11/23] update BloomFilter --- src/storage/disk_table.cc | 29 ++++++++++++++++++++++------- src/storage/disk_table.h | 16 ++++++++++++---- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index da8985944d3..10276813050 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -1301,9 +1301,8 @@ int DiskTable::GetCount(uint32_t index, const std::string& pk, uint64_t& count) return 0; } -uint32_t BloomFilter::Hash(const char *str, uint32_t seed) +uint32_t BloomFilter::hash(const char *str, uint32_t seed) { - // unsigned int b = 378551; uint a = 63689; uint hash = 0; @@ -1316,12 +1315,26 @@ uint32_t BloomFilter::Hash(const char *str, uint32_t seed) return (hash & 0x7FFFFFFF); } +void BloomFilter::setBit(uint32_t bit) { + uint32_t bits_num = bit / 64; + uint32_t bits_left = bit % 64; + + bits_[bits_num]->fetch_or((uint64_t)1 << bits_left, std::memory_order_relaxed); +} + +bool BloomFilter::getBit(uint32_t bit) { + uint32_t bits_num = bit / 64; + uint32_t bits_left = bit % 64; + + return (bits_[bits_num]->load(std::memory_order_relaxed) >> bits_left) & 1; +} + void BloomFilter::Set(const char *str) { for (int i = 0; i < k_; ++i) { - uint32_t p = Hash(str, base_[i]) % 1000000; - bit_[p] = 1; + uint32_t p = hash(str, base_[i]) % bitset_size_; + setBit(p); } } @@ -1330,8 +1343,8 @@ bool BloomFilter::Valid(const char *str) { for (int i = 0; i < k_; ++i) { - uint32_t p = Hash(str, base_[i]) % 1000000; - if (!bit_[p]) { + uint32_t p = hash(str, base_[i]) % bitset_size_; + if (!getBit(p)) { return false; } } @@ -1339,7 +1352,9 @@ bool BloomFilter::Valid(const char *str) } void BloomFilter::Reset() { - bit_.reset(); + for (uint32_t i = 0; i < bits_.size(); i++) { + bits_[i]->store(0, std::memory_order_relaxed); + } } } // namespace storage diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index eb9d85901e6..aec61ef6edf 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -131,18 +131,26 @@ class KeyTSComparator : public rocksdb::Comparator { class BloomFilter { public: - BloomFilter(uint32_t bitset_size, uint32_t string_cnt) : k_(5), bitset_size_(bitset_size), string_cnt_(string_cnt) {} + BloomFilter(uint32_t bitset_size, uint32_t string_cnt) : k_(5), bitset_size_(bitset_size), string_cnt_(string_cnt) { + bits_.reserve(10000); + for (uint32_t i = 0; i < 10000; i++) { + bits_.push_back(std::make_shared>(0)); + } + } virtual ~BloomFilter() {} - uint32_t Hash(const char* str, uint32_t seed); void Set(const char* str); bool Valid(const char* str); void Reset(); private: + uint32_t hash(const char* str, uint32_t seed); + void setBit(uint32_t bit); + bool getBit(uint32_t bit); + uint32_t k_, bitset_size_, - string_cnt_; // k:number of the hash functions //bitset_size:the size of bitset //string_cnt:number of strings to hash (k = [bitset_size/string_cnt]*ln2) - std::bitset<1<<20> bit_; + string_cnt_; + std::vector>> bits_; uint32_t base_[100] = {5, 7, 11, 13, 31, 37, 61}; }; From 253ede9e03381722e2725e0dbde625419ae4f728 Mon Sep 17 00:00:00 2001 From: litongxin Date: Wed, 11 May 2022 15:49:38 +0000 Subject: [PATCH 12/23] update test --- src/storage/disk_table.cc | 28 +- src/storage/disk_table.h | 4 - src/storage/table_test.cc | 726 +------------------------------------- 3 files changed, 16 insertions(+), 742 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 10276813050..6ac0fdc0971 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -533,7 +533,6 @@ void DiskTable::GcTTL() { PDLOG(WARNING, "Manual Compaction failed"); } } - PDLOG(ERROR, "Manual Compaction Finished"); } void DiskTable::GcTTLOrHead() {} @@ -1196,28 +1195,11 @@ bool DiskTable::DeleteIndex(const std::string& idx_name) { uint64_t DiskTable::GetRecordIdxCnt() { auto inner_indexs = table_index_.GetAllInnerIndex(); - rocksdb::ReadOptions ro = rocksdb::ReadOptions(); - const rocksdb::Snapshot* snapshot = db_->GetSnapshot(); - - // for (size_t i = 0; i < inner_indexs->size(); i++) { - // bool is_valid = false; - // for (const auto& index_def : inner_indexs->at(i)->GetIndex()) { - // if (index_def && index_def->IsReady()) { - // rocksdb::Iterator* it = db_->NewIterator(ro, cf_hs_[i + 1]); - - // for (; it->Valid(); it->Next()) { - // uint32_t cur_ts_idx = UINT32_MAX; - // std::string cur_pk; - // uint64_t cur_ts; - - // ParseKeyAndTs(has_ts_idx, it->key(), cur_pk, cur_ts, cur_ts_idx); - // } - // } - // } - - // } - // TODO(litongxin) - return 0; + uint64_t count = 0; + for (const auto& inner_index : *inner_indexs) { + count += idx_cnt_vec_[inner_index->GetIndex().front()->GetId()]->load(std::memory_order_relaxed); + } + return count; } bool DiskTable::GetRecordIdxCnt(uint32_t idx, uint64_t** stat, uint32_t* size) { diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index aec61ef6edf..424bb6f0670 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -21,7 +21,6 @@ #include #include #include -#include #include "base/endianconv.h" #include "base/slice.h" #include "boost/lexical_cast.hpp" @@ -42,7 +41,6 @@ #include "storage/table.h" #include "base/glog_wapper.h" // NOLINT - namespace openmldb { namespace storage { @@ -180,7 +178,6 @@ class AbsoluteTTLCompactionFilter : public rocksdb::CompactionFilter { bool Filter(int /*level*/, const rocksdb::Slice& key, const rocksdb::Slice& /*existing_value*/, std::string* /*new_value*/, bool* /*value_changed*/) const override { - PDLOG(ERROR, "using compaction filter"); if (key.size() < TS_LEN) { return false; } @@ -555,7 +552,6 @@ class DiskTable : public Table { KeyTSComparator cmp_; std::atomic offset_; std::string table_path_; - std::atomic pk_cnt_; std::vector>> idx_cnt_vec_; std::vector>> pk_cnt_vec_; std::vector bloom_filter_vec_; diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 35ace4dcd58..755f1dadf0d 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -1927,661 +1927,8 @@ TEST_P(TableTest, AbsAndLat) { delete table; } -TEST_P(TableTest, AbsAndLatGC) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsAndLat, 100, 10); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsAndLat, 50, 8); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "test", "ts3", ::openmldb::type::kAbsAndLat, 70, 5); - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test" + std::to_string(i % 10), - "testnew" + std::to_string(i % 10), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 2; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 1) { - ASSERT_EQ(80, count); - } else if (i == 2) { - ASSERT_EQ(70, count); - } else { - ASSERT_EQ(100, count); - } - } - - EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - table->SchedGc(); - EXPECT_EQ(180, (int64_t)table->GetRecordIdxCnt()); - - delete table; -} - -TEST_P(TableTest, AbsAndLatGC1) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 10); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kLatestTime, 0, 8); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "test", "ts3", ::openmldb::type::kLatestTime, 0, 5); - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test" + std::to_string(i % 10), - "testnew" + std::to_string(i % 10), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 2; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 1) { - ASSERT_EQ(80, count); - } else if (i == 2) { - ASSERT_EQ(50, count); - } else { - ASSERT_EQ(100, count); - } - } - - EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - table->SchedGc(); - EXPECT_EQ(180, (int64_t)table->GetRecordIdxCnt()); - - delete table; -} - -TEST_P(TableTest, AbsAndLatGC2) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 70, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test"+ std::to_string(i), - "testnew"+ std::to_string(i), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 1; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 0) { - ASSERT_EQ(70, count); - } else if (i == 1) { - ASSERT_EQ(50, count); - } - } - - EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); - - table->SchedGc(); - table->SchedGc(); - EXPECT_EQ(122, (int64_t)table->GetRecordIdxCnt()); - EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); - - delete table; -} - -TEST_P(TableTest, TsIdxCntPut) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 100, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 70, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 60, 0); - - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test"+ std::to_string(i % 10), - "testnew"+ std::to_string(i % 10), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 2; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 0) { - ASSERT_EQ(50, count); - } else if (i == 1) { - ASSERT_EQ(20, count); - } else if (i == 2) { - ASSERT_EQ(10, count); - } - } - - - EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); - - table->SchedGc(); - EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); - - delete table; -} - -TEST_P(TableTest, TsIdxCntPut2) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 8); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kLatestTime, 0, 6); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kLatestTime, 0, 5); - - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test"+ std::to_string(i % 10), - "testnew"+ std::to_string(i % 10), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 2; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 0) { - EXPECT_EQ(80, count); - } else if (i == 1) { - EXPECT_EQ(50, count); - } else if (i == 2) { - EXPECT_EQ(60, count); - } - } - - - EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - - table->SchedGc(); - EXPECT_EQ(130, (int64_t)table->GetRecordIdxCnt()); - - delete table; -} - -TEST_P(TableTest, TsIdxCntPut3) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 8); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kLatestTime, 0, 6); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kLatestTime, 0, 5); - - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test"+ std::to_string(i % 10), - "testnew"+ std::to_string(i % 10), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 2; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 0) { - EXPECT_EQ(80, count); - } else if (i == 1) { - EXPECT_EQ(60, count); - } else if (i == 2) { - EXPECT_EQ(50, count); - } - } - - - EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); - - table->SchedGc(); - EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); - - delete table; -} - -TEST_P(TableTest, TsIdxCntPut4) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 60, 0); - - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test"+ std::to_string(i), - "testnew"+ std::to_string(i), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 2; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 0) { - EXPECT_EQ(90, count); - } else if (i == 1) { - EXPECT_EQ(50, count); - } else if (i == 2) { - EXPECT_EQ(60, count); - } - } - - - EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); - - table->SchedGc(); - table->SchedGc(); - table->SchedGc(); - table->SchedGc(); - EXPECT_EQ(200, (int64_t)table->GetRecordPkCnt()); - - delete table; -} - -TEST_P(TableTest, TsIdxCntPut5) { - ::openmldb::common::StorageMode storageMode = GetParam(); - ::openmldb::api::TableMeta table_meta; - table_meta.set_name("table1"); - std::string table_path = ""; - int id = 1; - if (storageMode == ::openmldb::common::kHDD) { - id = ++counter; - table_path = GetDBPath(FLAGS_hdd_root_path, id, 1); - } - table_meta.set_tid(id); - table_meta.set_pid(1); - table_meta.set_seg_cnt(1); - table_meta.set_mode(::openmldb::api::TableMode::kTableLeader); - table_meta.set_key_entry_max_height(8); - table_meta.set_storage_mode(storageMode); - table_meta.set_format_version(1); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "test", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "testnew", ::openmldb::type::kString); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts1", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts2", ::openmldb::type::kBigInt); - SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); - - SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts1", ::openmldb::type::kAbsoluteTime, 50, 0); - - - Table* table = CreateTable(table_meta, table_path); - table->Init(); - codec::SDKCodec codec(table_meta); - uint64_t now = ::baidu::common::timer::get_micros() / 1000; - - for (int i = 0; i < 100; i++) { - uint64_t ts = now - (99 - i) * 60 * 1000; - std::string ts_str = std::to_string(ts); - - std::vector row = {"test"+ std::to_string(i), - "testnew"+ std::to_string(i), - ts_str, - ts_str, - ts_str}; - ::openmldb::api::PutRequest request; - ::openmldb::api::Dimension* dim = request.add_dimensions(); - dim->set_idx(0); - dim->set_key(row[0]); - ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - dim1->set_idx(1); - dim1->set_key(row[1]); - std::string value; - ASSERT_EQ(0, codec.EncodeRow(row, &value)); - table->Put(0, value, request.dimensions()); - } - - for (int i = 0; i <= 1; i++) { - TableIterator* it = table->NewTraverseIterator(i); - it->SeekToFirst(); - int count = 0; - while (it->Valid()) { - it->Next(); - count++; - } - - if (i == 0) { - EXPECT_EQ(90, count); - } else if (i == 1) { - EXPECT_EQ(50, count); - } else if (i == 2) { - EXPECT_EQ(60, count); - } - } - - - EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - - table->SchedGc(); - EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); - - uint64_t* stats = NULL; - uint32_t size = 0; - ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); - int ts_count = 0; - for (int i = 0; i < size; i++) { - ts_count += stats[i]; - } - EXPECT_EQ(91, ts_count); - - delete table; -} - -TEST_P(TableTest, TsIdxCntPut6) { - ::openmldb::common::StorageMode storageMode = GetParam(); +TEST_F(TableTest, GetRecordAbsTTL) { + ::openmldb::common::StorageMode storageMode = openmldb::common::StorageMode::kHDD; ::openmldb::api::TableMeta table_meta; table_meta.set_name("table1"); std::string table_path = ""; @@ -2679,7 +2026,7 @@ TEST_P(TableTest, TsIdxCntPut6) { table->SchedGc(); - EXPECT_EQ(142, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(140, (int64_t)table->GetRecordIdxCnt()); EXPECT_EQ(14, (int64_t)table->GetRecordPkCnt()); stats = NULL; @@ -2689,7 +2036,7 @@ TEST_P(TableTest, TsIdxCntPut6) { for (int i = 0; i < size; i++) { ts_count += stats[i]; } - EXPECT_EQ(51, ts_count); + EXPECT_EQ(50, ts_count); stats = NULL; size = 0; @@ -2698,13 +2045,13 @@ TEST_P(TableTest, TsIdxCntPut6) { for (int i = 0; i < size; i++) { ts_count += stats[i]; } - EXPECT_EQ(41, ts_count); + EXPECT_EQ(40, ts_count); delete table; } -TEST_P(TableTest, TsIdxCntPut7) { - ::openmldb::common::StorageMode storageMode = GetParam(); +TEST_F(TableTest, GetRecordLatTTL) { + ::openmldb::common::StorageMode storageMode = openmldb::common::StorageMode::kHDD; ::openmldb::api::TableMeta table_meta; table_meta.set_name("table1"); std::string table_path = ""; @@ -2784,7 +2131,7 @@ TEST_P(TableTest, TsIdxCntPut7) { table->SchedGc(); EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); - EXPECT_EQ(140, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(120, (int64_t)table->GetRecordIdxCnt()); uint64_t* stats = NULL; uint32_t size = 0; @@ -2813,62 +2160,11 @@ TEST_P(TableTest, TsIdxCntPut7) { } EXPECT_EQ(70, ts_count); - // for (int i = 0; i < 100; i++) { - // uint64_t ts = now - (99 - i) * 60 * 1000; - // std::string ts_str = std::to_string(ts); - - // std::vector row = {"testk"+ std::to_string(i % 10), - // "testknew"+ std::to_string(i % 10), - // ts_str, - // ts_str, - // ts_str}; - // ::openmldb::api::PutRequest request; - // ::openmldb::api::Dimension* dim = request.add_dimensions(); - // dim->set_idx(0); - // dim->set_key(row[0]); - // ::openmldb::api::Dimension* dim1 = request.add_dimensions(); - // dim1->set_idx(1); - // dim1->set_key(row[1]); - // ::openmldb::api::Dimension* dim2 = request.add_dimensions(); - // dim2->set_idx(2); - // dim2->set_key(row[1]); - // std::string value; - // ASSERT_EQ(0, codec.EncodeRow(row, &value)); - // table->Put(0, value, request.dimensions()); - // } - - // stats = NULL; - // size = 0; - // ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); - // ts_count = 0; - // for (int i = 0; i < size; i++) { - // ts_count += stats[i]; - // } - // EXPECT_EQ(150, ts_count); - - // stats = NULL; - // size = 0; - // ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); - // ts_count = 0; - // for (int i = 0; i < size; i++) { - // ts_count += stats[i]; - // } - // EXPECT_EQ(140, ts_count); - - // stats = NULL; - // size = 0; - // ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); - // ts_count = 0; - // for (int i = 0; i < size; i++) { - // ts_count += stats[i]; - // } - // EXPECT_EQ(170, ts_count); - delete table; } -TEST_P(TableTest, TsIdxCntPut8) { - ::openmldb::common::StorageMode storageMode = GetParam(); +TEST_F(TableTest, GetRecordAbsAndLatTTL) { + ::openmldb::common::StorageMode storageMode = openmldb::common::StorageMode::kHDD; ::openmldb::api::TableMeta table_meta; table_meta.set_name("table1"); std::string table_path = ""; @@ -2948,7 +2244,7 @@ TEST_P(TableTest, TsIdxCntPut8) { table->SchedGc(); EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); - EXPECT_EQ(140, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(120, (int64_t)table->GetRecordIdxCnt()); uint64_t* stats = NULL; uint32_t size = 0; From b9a69143f2440d15f9ebdb394ef02e0ccd62f332 Mon Sep 17 00:00:00 2001 From: litongxin Date: Wed, 11 May 2022 19:03:23 +0000 Subject: [PATCH 13/23] add idxcnt in delete --- src/storage/disk_table.cc | 32 ++++++++++++++++++++++++++++++++ src/storage/table_test.cc | 6 ++++++ 2 files changed, 38 insertions(+) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 6ac0fdc0971..bad3add4b34 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -299,6 +299,13 @@ bool DiskTable::Delete(const std::string& pk, uint32_t idx) { return false; } auto inner_index = table_index_.GetInnerIndex(index_def->GetInnerPos()); + rocksdb::ReadOptions ro = rocksdb::ReadOptions(); + const rocksdb::Snapshot* snapshot = db_->GetSnapshot(); + ro.snapshot = snapshot; + ro.pin_data = true; + rocksdb::Iterator* it = db_->NewIterator(ro, cf_hs_[index_def->GetInnerPos() + 1]); + std::map delete_idx_cnt; + if (inner_index && inner_index->GetIndex().size() > 1) { const auto& indexs = inner_index->GetIndex(); for (const auto& index : indexs) { @@ -308,16 +315,41 @@ bool DiskTable::Delete(const std::string& pk, uint32_t idx) { } std::string combine_key1 = CombineKeyTs(pk, UINT64_MAX, ts_col->GetId()); std::string combine_key2 = CombineKeyTs(pk, 0, ts_col->GetId()); + it->Seek(rocksdb::Slice(combine_key1)); + while (it->Valid() && it->key().compare(rocksdb::Slice(combine_key2)) != 0) { + if (delete_idx_cnt.find(index->GetId()) != delete_idx_cnt.end()) { + delete_idx_cnt[index->GetId()]++; + } else { + delete_idx_cnt.emplace(index->GetId(), 1); + } + it->Next(); + } batch.DeleteRange(cf_hs_[idx + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); } } else { std::string combine_key1 = CombineKeyTs(pk, UINT64_MAX); std::string combine_key2 = CombineKeyTs(pk, 0); + it->Seek(rocksdb::Slice(combine_key1)); + const auto& index = inner_index->GetIndex().front(); + while (it->Valid() && it->key().compare(rocksdb::Slice(combine_key2)) != 0) { + if (delete_idx_cnt.find(index->GetId()) != delete_idx_cnt.end()) { + delete_idx_cnt[index->GetId()]++; + } else { + delete_idx_cnt.emplace(index->GetId(), 1); + } + it->Next(); + } batch.DeleteRange(cf_hs_[idx + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); } rocksdb::Status s = db_->Write(write_opts_, &batch); if (s.ok()) { offset_.fetch_add(1, std::memory_order_relaxed); + for (auto ts_idx_iter = delete_idx_cnt.begin(); ts_idx_iter != delete_idx_cnt.end(); ts_idx_iter++) { + idx_cnt_vec_[ts_idx_iter->first]->fetch_sub(ts_idx_iter->second, std::memory_order_relaxed); + } + if (delete_idx_cnt.size() > 0) { + pk_cnt_vec_[index_def->GetInnerPos()]->fetch_sub(1); + } return true; } else { DEBUGLOG("Delete failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 755f1dadf0d..b5fa6170416 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2160,6 +2160,12 @@ TEST_F(TableTest, GetRecordLatTTL) { } EXPECT_EQ(70, ts_count); + table->Delete("test0", 0); + table->Delete("testnew0", 1); + EXPECT_EQ(18, (int64_t)table->GetRecordPkCnt()); + EXPECT_EQ(114, (int64_t)table->GetRecordIdxCnt()); + + delete table; } From 31e602ad31c1ea8cd827256f6ca86d209e4abf9e Mon Sep 17 00:00:00 2001 From: litongxin Date: Thu, 12 May 2022 13:34:39 +0000 Subject: [PATCH 14/23] update delete and reformat --- src/storage/disk_table.cc | 27 ++++++++++++++++++++------- src/storage/disk_table.h | 1 - src/storage/table_test.cc | 22 +++++++++++----------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index bad3add4b34..d930be59a23 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -316,7 +316,14 @@ bool DiskTable::Delete(const std::string& pk, uint32_t idx) { std::string combine_key1 = CombineKeyTs(pk, UINT64_MAX, ts_col->GetId()); std::string combine_key2 = CombineKeyTs(pk, 0, ts_col->GetId()); it->Seek(rocksdb::Slice(combine_key1)); - while (it->Valid() && it->key().compare(rocksdb::Slice(combine_key2)) != 0) { + while (it->Valid()) { + std::string cur_pk; + uint64_t cur_ts; + uint32_t cur_ts_idx; + ParseKeyAndTs(true, it->key(), cur_pk, cur_ts, cur_ts_idx); + if (pk.compare(cur_pk) != 0 || cur_ts_idx != ts_col->GetId()) { + break; + } if (delete_idx_cnt.find(index->GetId()) != delete_idx_cnt.end()) { delete_idx_cnt[index->GetId()]++; } else { @@ -324,14 +331,20 @@ bool DiskTable::Delete(const std::string& pk, uint32_t idx) { } it->Next(); } - batch.DeleteRange(cf_hs_[idx + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); + batch.DeleteRange(cf_hs_[index_def->GetInnerPos() + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); } } else { std::string combine_key1 = CombineKeyTs(pk, UINT64_MAX); std::string combine_key2 = CombineKeyTs(pk, 0); it->Seek(rocksdb::Slice(combine_key1)); const auto& index = inner_index->GetIndex().front(); - while (it->Valid() && it->key().compare(rocksdb::Slice(combine_key2)) != 0) { + while (it->Valid()) { + std::string cur_pk; + uint64_t cur_ts; + ParseKeyAndTs(it->key(), cur_pk, cur_ts); + if (pk.compare(cur_pk) != 0) { + break; + } if (delete_idx_cnt.find(index->GetId()) != delete_idx_cnt.end()) { delete_idx_cnt[index->GetId()]++; } else { @@ -339,7 +352,7 @@ bool DiskTable::Delete(const std::string& pk, uint32_t idx) { } it->Next(); } - batch.DeleteRange(cf_hs_[idx + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); + rocksdb::Status s = batch.DeleteRange(cf_hs_[index_def->GetInnerPos() + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); } rocksdb::Status s = db_->Write(write_opts_, &batch); if (s.ok()) { @@ -559,7 +572,7 @@ void DiskTable::GcHead() { void DiskTable::GcTTL() { auto inner_indexs = table_index_.GetAllInnerIndex(); - for (int i = 0; i < inner_indexs->size(); i++) { + for (uint32_t i = 0; i < inner_indexs->size(); i++) { auto s = db_->CompactRange(rocksdb::CompactRangeOptions(), cf_hs_[i + 1], nullptr, nullptr); if (!s.ok()) { PDLOG(WARNING, "Manual Compaction failed"); @@ -1345,7 +1358,7 @@ bool BloomFilter::getBit(uint32_t bit) { void BloomFilter::Set(const char *str) { - for (int i = 0; i < k_; ++i) + for (uint32_t i = 0; i < k_; ++i) { uint32_t p = hash(str, base_[i]) % bitset_size_; setBit(p); @@ -1355,7 +1368,7 @@ void BloomFilter::Set(const char *str) bool BloomFilter::Valid(const char *str) { - for (int i = 0; i < k_; ++i) + for (uint32_t i = 0; i < k_; ++i) { uint32_t p = hash(str, base_[i]) % bitset_size_; if (!getBit(p)) { diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 424bb6f0670..e4da82be6a9 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -283,7 +283,6 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { return true; } idx_cnt_vec_->at(idx)->fetch_add(1, std::memory_order_relaxed); - uint32_t inner_pos = inner_index_->GetId(); std::string pk; if (indexs.size() > 1) { pk.assign(key.data(), key.size() - TS_LEN - TS_POS_LEN); diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index b5fa6170416..b4fb4785d0c 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2010,7 +2010,7 @@ TEST_F(TableTest, GetRecordAbsTTL) { uint32_t size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); int ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(100, ts_count); @@ -2019,7 +2019,7 @@ TEST_F(TableTest, GetRecordAbsTTL) { size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(100, ts_count); @@ -2033,7 +2033,7 @@ TEST_F(TableTest, GetRecordAbsTTL) { size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(50, ts_count); @@ -2042,7 +2042,7 @@ TEST_F(TableTest, GetRecordAbsTTL) { size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(40, ts_count); @@ -2137,7 +2137,7 @@ TEST_F(TableTest, GetRecordLatTTL) { uint32_t size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); int ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(50, ts_count); @@ -2146,7 +2146,7 @@ TEST_F(TableTest, GetRecordLatTTL) { size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(40, ts_count); @@ -2155,7 +2155,7 @@ TEST_F(TableTest, GetRecordLatTTL) { size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(70, ts_count); @@ -2163,7 +2163,7 @@ TEST_F(TableTest, GetRecordLatTTL) { table->Delete("test0", 0); table->Delete("testnew0", 1); EXPECT_EQ(18, (int64_t)table->GetRecordPkCnt()); - EXPECT_EQ(114, (int64_t)table->GetRecordIdxCnt()); + EXPECT_EQ(108, (int64_t)table->GetRecordIdxCnt()); delete table; @@ -2256,7 +2256,7 @@ TEST_F(TableTest, GetRecordAbsAndLatTTL) { uint32_t size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(1, &stats, &size)); int ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(50, ts_count); @@ -2265,7 +2265,7 @@ TEST_F(TableTest, GetRecordAbsAndLatTTL) { size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(2, &stats, &size)); ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(40, ts_count); @@ -2274,7 +2274,7 @@ TEST_F(TableTest, GetRecordAbsAndLatTTL) { size = 0; ASSERT_TRUE(table->GetRecordIdxCnt(0, &stats, &size)); ts_count = 0; - for (int i = 0; i < size; i++) { + for (uint32_t i = 0; i < size; i++) { ts_count += stats[i]; } EXPECT_EQ(70, ts_count); From 4ffa37c312a807dd4a50ade54d774aa5628af878 Mon Sep 17 00:00:00 2001 From: litongxin Date: Thu, 12 May 2022 15:23:18 +0000 Subject: [PATCH 15/23] remove whitespace --- src/storage/disk_table.cc | 2 +- src/storage/disk_table.h | 2 +- src/storage/table_test.cc | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 5199c4be3f8..28a7bdcbefd 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -225,7 +225,7 @@ bool DiskTable::Put(const std::string& pk, uint64_t time, const char* data, uint if (!bloom_filter_vec_[0].Valid(pk.c_str())) { bloom_filter_vec_[0].Set(pk.c_str()); pk_cnt_vec_[0]->fetch_add(1, std::memory_order_relaxed); - } + } return true; } else { DEBUGLOG("Put failed. tid %u pid %u msg %s", id_, pid_, s.ToString().c_str()); diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 9b898f8b660..a66a7b4a144 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -147,7 +147,7 @@ class BloomFilter { bool getBit(uint32_t bit); uint32_t k_, bitset_size_, - string_cnt_; + string_cnt_; std::vector>> bits_; uint32_t base_[100] = {5, 7, 11, 13, 31, 37, 61}; }; diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index b4fb4785d0c..33fa361a542 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -2023,7 +2023,7 @@ TEST_F(TableTest, GetRecordAbsTTL) { ts_count += stats[i]; } EXPECT_EQ(100, ts_count); - + table->SchedGc(); EXPECT_EQ(140, (int64_t)table->GetRecordIdxCnt()); @@ -2128,7 +2128,7 @@ TEST_F(TableTest, GetRecordLatTTL) { EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - + table->SchedGc(); EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); EXPECT_EQ(120, (int64_t)table->GetRecordIdxCnt()); @@ -2247,7 +2247,7 @@ TEST_F(TableTest, GetRecordAbsAndLatTTL) { EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); EXPECT_EQ(200, (int64_t)table->GetRecordIdxCnt()); - + table->SchedGc(); EXPECT_EQ(20, (int64_t)table->GetRecordPkCnt()); EXPECT_EQ(120, (int64_t)table->GetRecordIdxCnt()); From ae9994339bf30ba5616ff57e7339cffe170732e9 Mon Sep 17 00:00:00 2001 From: litongxin Date: Thu, 12 May 2022 16:16:01 +0000 Subject: [PATCH 16/23] fix reviewDog --- src/storage/disk_table.cc | 16 +++++++++++----- src/storage/table_test.cc | 10 ++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 28a7bdcbefd..f17bf6f4d01 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -16,6 +16,7 @@ #include "storage/disk_table.h" #include +#include #include "base/file_util.h" #include "base/glog_wapper.h" // NOLINT #include "base/hash.h" @@ -331,7 +332,8 @@ bool DiskTable::Delete(const std::string& pk, uint32_t idx) { } it->Next(); } - batch.DeleteRange(cf_hs_[index_def->GetInnerPos() + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); + batch.DeleteRange(cf_hs_[index_def->GetInnerPos() + 1], rocksdb::Slice(combine_key1), + rocksdb::Slice(combine_key2)); } } else { std::string combine_key1 = CombineKeyTs(pk, UINT64_MAX); @@ -352,7 +354,8 @@ bool DiskTable::Delete(const std::string& pk, uint32_t idx) { } it->Next(); } - rocksdb::Status s = batch.DeleteRange(cf_hs_[index_def->GetInnerPos() + 1], rocksdb::Slice(combine_key1), rocksdb::Slice(combine_key2)); + rocksdb::Status s = batch.DeleteRange(cf_hs_[index_def->GetInnerPos() + 1], rocksdb::Slice(combine_key1), + rocksdb::Slice(combine_key2)); } rocksdb::Status s = db_->Write(write_opts_, &batch); if (s.ok()) { @@ -475,10 +478,13 @@ void DiskTable::GcHead() { for (auto ts_idx_iter = key_cnt.begin(); ts_idx_iter != key_cnt.end(); ts_idx_iter++) { auto index_iterator = idx_map.find(ts_idx_iter->first); auto ttl_iter = ttl_map.find(ts_idx_iter->first); - if (ttl_iter != ttl_map.end() && ttl_iter->second > 0 && ts_idx_iter->second <= ttl_iter->second) { - idx_cnt_vec_[index_iterator->second]->fetch_add(ts_idx_iter->second, std::memory_order_relaxed); + if (ttl_iter != ttl_map.end() && ttl_iter->second > 0 && + ts_idx_iter->second <= ttl_iter->second) { + idx_cnt_vec_[index_iterator->second]->fetch_add(ts_idx_iter->second, + std::memory_order_relaxed); } else { - idx_cnt_vec_[index_iterator->second]->fetch_add(ttl_iter->second, std::memory_order_relaxed); + idx_cnt_vec_[index_iterator->second]->fetch_add(ttl_iter->second, + std::memory_order_relaxed); } } if (key_cnt.size() > 0) { diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 33fa361a542..c3edb8a9bb0 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -1951,9 +1951,10 @@ TEST_F(TableTest, GetRecordAbsTTL) { SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kAbsoluteTime, 90, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 40, 0); - + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, + 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kAbsoluteTime, 40, + 0); Table* table = CreateTable(table_meta, table_path); table->Init(); @@ -2193,7 +2194,8 @@ TEST_F(TableTest, GetRecordAbsAndLatTTL) { SchemaCodec::SetColumnDesc(table_meta.add_column_desc(), "ts3", ::openmldb::type::kBigInt); SchemaCodec::SetIndex(table_meta.add_column_key(), "index0", "test", "ts1", ::openmldb::type::kLatestTime, 0, 7); - SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, 0); + SchemaCodec::SetIndex(table_meta.add_column_key(), "index1", "testnew", "ts2", ::openmldb::type::kAbsoluteTime, 50, + 0); SchemaCodec::SetIndex(table_meta.add_column_key(), "index2", "testnew", "ts3", ::openmldb::type::kLatestTime, 0, 4); From da374a9d94dd9bb08c6c19451c906046d088c90a Mon Sep 17 00:00:00 2001 From: litongxin Date: Thu, 12 May 2022 16:51:53 +0000 Subject: [PATCH 17/23] fix reviewDog --- src/storage/disk_table.cc | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index f17bf6f4d01..5a1bec9abbc 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -1330,17 +1330,14 @@ int DiskTable::GetCount(uint32_t index, const std::string& pk, uint64_t& count) break; } } - return 0; } -uint32_t BloomFilter::hash(const char *str, uint32_t seed) -{ +uint32_t BloomFilter::hash(const char* str, uint32_t seed) { uint a = 63689; uint hash = 0; - while (*str) - { + while (*str) { hash = hash * a + (*str++); a *= seed; } @@ -1362,20 +1359,15 @@ bool BloomFilter::getBit(uint32_t bit) { return (bits_[bits_num]->load(std::memory_order_relaxed) >> bits_left) & 1; } -void BloomFilter::Set(const char *str) -{ - for (uint32_t i = 0; i < k_; ++i) - { +void BloomFilter::Set(const char* str) { + for (uint32_t i = 0; i < k_; ++i) { uint32_t p = hash(str, base_[i]) % bitset_size_; setBit(p); } - } -bool BloomFilter::Valid(const char *str) -{ - for (uint32_t i = 0; i < k_; ++i) - { +bool BloomFilter::Valid(const char* str) { + for (uint32_t i = 0; i < k_; ++i) { uint32_t p = hash(str, base_[i]) % bitset_size_; if (!getBit(p)) { return false; From 3cd7faca6b4b0a3308b86f8a577f25dfbc3491e6 Mon Sep 17 00:00:00 2001 From: litongxin Date: Fri, 13 May 2022 14:29:58 +0000 Subject: [PATCH 18/23] deal with AbsAndLat and AbsOrLat --- src/storage/disk_table.cc | 13 ++++++------- src/storage/disk_table.h | 3 +++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 5a1bec9abbc..b419e1c09cf 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -173,8 +173,7 @@ bool DiskTable::InitColumnFamilyDescriptor() { cfo.prefix_extractor.reset(new KeyTsPrefixTransform()); const auto& indexs = inner_index->GetIndex(); for (const auto& index_def : indexs) { - if (index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsoluteTime || - index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsOrLat) { + if (index_def->GetTTLType() == ::openmldb::storage::TTLType::kAbsoluteTime) { cfo.compaction_filter_factory = std::make_shared( inner_index, &idx_cnt_vec_, pk_cnt_vec_[inner_index->GetId()], &bloom_filter_vec_[inner_index->GetId()]); @@ -424,7 +423,7 @@ void DiskTable::GcHead() { if (indexs.size() > 1) { std::map ttl_map; std::map idx_map; - std::set abs_set; + std::set other_TTL_set; for (const auto& index : indexs) { auto ts_col = index->GetTsColumn(); if (ts_col) { @@ -432,9 +431,9 @@ void DiskTable::GcHead() { if (lat_ttl > 0) { ttl_map.emplace(ts_col->GetId(), lat_ttl); } - auto abs_ttl = index->GetTTL()->abs_ttl; - if (abs_ttl > 0) { - abs_set.insert(ts_col->GetId()); + auto TTL_type = index->GetTTLType(); + if (TTL_type != openmldb::storage::TTLType::kLatestTime) { + other_TTL_set.insert(ts_col->GetId()); } idx_map.emplace(ts_col->GetId(), index->GetId()); } @@ -447,7 +446,7 @@ void DiskTable::GcHead() { uint64_t ts = 0; uint32_t ts_idx = 0; ParseKeyAndTs(true, it->key(), cur_pk, ts, ts_idx); - if (abs_set.find(ts_idx) != abs_set.end()) { + if (other_TTL_set.find(ts_idx) != other_TTL_set.end()) { it->Next(); continue; } diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index a66a7b4a144..033059bed90 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -259,6 +259,9 @@ class AbsoluteTTLAndCountCompactionFilter : public rocksdb::CompactionFilter { return false; } if (ts_col->GetId() == ts_idx) { + if (index->GetTTLType() != ::openmldb::storage::TTLType::kAbsoluteTime) { + return false; + } real_ttl = index->GetTTL()->abs_ttl; idx = index->GetId(); has_found = true; From 889b49971c67dad5d609c55c23d3ab93da97d7d4 Mon Sep 17 00:00:00 2001 From: litongxin Date: Wed, 29 Jun 2022 19:01:00 +0000 Subject: [PATCH 19/23] use gflags in bloom_filter --- src/flags.cc | 3 +++ src/storage/disk_table.cc | 13 ++++++++----- src/storage/disk_table.h | 12 ++++++------ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/flags.cc b/src/flags.cc index db6c07ccf66..7ecc140f1e7 100644 --- a/src/flags.cc +++ b/src/flags.cc @@ -121,6 +121,9 @@ DEFINE_int32(snapshot_pool_size, 1, "the size of tablet thread pool for making s DEFINE_uint32(load_index_max_wait_time, 120 * 60 * 1000, "config the max wait time of load index"); +DEFINE_uint32(bloom_filter_bitset_size, 10000, "config the size of bitset in bloom filter"); +DEFINE_uint32(bloom_filter_hash_seed, 7, "config the count of hash seed in bloom filter, max 7"); + DEFINE_string(recycle_bin_root_path, "/tmp/recycle", "specify the root path of recycle bin"); DEFINE_string(recycle_bin_ssd_root_path, "", "specify the root path of recycle bin in ssd"); DEFINE_string(recycle_bin_hdd_root_path, "", "specify the root path of recycle bin in hdd"); diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index b419e1c09cf..8058565b6cf 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -31,6 +31,9 @@ DECLARE_uint32(write_buffer_mb); DECLARE_uint32(block_cache_shardbits); DECLARE_bool(verify_compression); +DECLARE_uint32(bloom_filter_bitset_size); +DECLARE_uint32(bloom_filter_hash_seed); + namespace openmldb { namespace storage { @@ -154,7 +157,7 @@ bool DiskTable::InitColumnFamilyDescriptor() { auto inner_indexs = table_index_.GetAllInnerIndex(); for (uint32_t i = 0; i < inner_indexs->size(); i++) { pk_cnt_vec_.push_back(std::make_shared>(0)); - bloom_filter_vec_.push_back(BloomFilter(1000, 100)); + bloom_filter_vec_.push_back(BloomFilter()); } auto indexs = table_index_.GetAllIndex(); for (uint32_t i = 0; i < indexs.size(); i++) { @@ -1359,15 +1362,15 @@ bool BloomFilter::getBit(uint32_t bit) { } void BloomFilter::Set(const char* str) { - for (uint32_t i = 0; i < k_; ++i) { - uint32_t p = hash(str, base_[i]) % bitset_size_; + for (uint32_t i = 0; i < FLAGS_bloom_filter_hash_seed; ++i) { + uint32_t p = hash(str, base_[i]) % FLAGS_bloom_filter_bitset_size; setBit(p); } } bool BloomFilter::Valid(const char* str) { - for (uint32_t i = 0; i < k_; ++i) { - uint32_t p = hash(str, base_[i]) % bitset_size_; + for (uint32_t i = 0; i < FLAGS_bloom_filter_hash_seed; ++i) { + uint32_t p = hash(str, base_[i]) % FLAGS_bloom_filter_bitset_size; if (!getBit(p)) { return false; } diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index 22535f018b9..cbcc96c0f5c 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -41,6 +41,9 @@ #include "storage/table.h" #include "base/glog_wapper.h" // NOLINT +DECLARE_uint32(bloom_filter_bitset_size); +DECLARE_uint32(bloom_filter_hash_seed); + namespace openmldb { namespace storage { @@ -129,9 +132,8 @@ class KeyTSComparator : public rocksdb::Comparator { class BloomFilter { public: - BloomFilter(uint32_t bitset_size, uint32_t string_cnt) : k_(5), bitset_size_(bitset_size), string_cnt_(string_cnt) { - bits_.reserve(10000); - for (uint32_t i = 0; i < 10000; i++) { + BloomFilter() { + for (uint32_t i = 0; i < FLAGS_bloom_filter_bitset_size; i++) { bits_.push_back(std::make_shared>(0)); } } @@ -146,10 +148,8 @@ class BloomFilter { void setBit(uint32_t bit); bool getBit(uint32_t bit); - uint32_t k_, bitset_size_, - string_cnt_; std::vector>> bits_; - uint32_t base_[100] = {5, 7, 11, 13, 31, 37, 61}; + uint32_t base_[7] = {5, 7, 11, 13, 31, 37, 61}; }; class KeyTsPrefixTransform : public rocksdb::SliceTransform { From 7c8db5f88dcc575b0c2e7d550e5f6532bdb3f5bc Mon Sep 17 00:00:00 2001 From: litongxin Date: Tue, 12 Jul 2022 10:17:24 +0000 Subject: [PATCH 20/23] move back tests about GetRecordCnt --- src/storage/table_test.cc | 145 +++++++++----------------------------- 1 file changed, 34 insertions(+), 111 deletions(-) diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index 39d49a279d7..e352c8bb424 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -200,11 +200,7 @@ TEST_P(TableTest, MultiDimissionPut0) { sdk_codec.EncodeRow({"d0", "d1", "d2"}, &result); bool ok = table->Put(1, result, dimensions); ASSERT_TRUE(ok); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(3, (int64_t)table->GetRecordIdxCnt()); - } + ASSERT_EQ(3, (int64_t)table->GetRecordIdxCnt()); ASSERT_EQ(1, (int64_t)table->GetRecordCnt()); delete table; } @@ -326,11 +322,6 @@ TEST_P(TableTest, Iterator_GetSize) { TEST_P(TableTest, SchedGcHead) { ::openmldb::common::StorageMode storageMode = GetParam(); - // some functions with disktable mode in this test have not been implemented. - // refer to issue #1238 - if (storageMode == openmldb::common::kHDD) { - return; - } std::map mapping; mapping.insert(std::make_pair("idx0", 0)); std::string table_path = ""; @@ -349,12 +340,8 @@ TEST_P(TableTest, SchedGcHead) { value = ::openmldb::test::EncodeKV("test", "test2"); table->Put("test", 1, value.data(), value.size()); ASSERT_EQ(2, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(1, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(1, (int64_t)table->GetRecordPkCnt()); table->SchedGc(); { ::openmldb::api::LogEntry entry; @@ -380,7 +367,9 @@ TEST_P(TableTest, SchedGcHead) { ASSERT_FALSE(table->IsExpire(entry)); } } - ASSERT_EQ(1, (int64_t)table->GetRecordCnt()); + if (storageMode == ::openmldb::common::StorageMode::kMemory) { + ASSERT_EQ(1, (int64_t)table->GetRecordCnt()); + } ASSERT_EQ(1, (int64_t)table->GetRecordIdxCnt()); ASSERT_EQ(bytes, table->GetRecordByteSize()); ASSERT_EQ(record_idx_bytes, table->GetRecordIdxByteSize()); @@ -436,11 +425,6 @@ TEST_P(TableTest, SchedGcHead1) { TEST_P(TableTest, SchedGc) { ::openmldb::common::StorageMode storageMode = GetParam(); - // some functions with disktable mode in this test have not been implemented. - // refer to issue #1238 - if (storageMode == openmldb::common::kHDD) { - return; - } std::map mapping; mapping.insert(std::make_pair("idx0", 0)); std::string table_path = ""; @@ -459,17 +443,13 @@ TEST_P(TableTest, SchedGc) { uint64_t record_idx_bytes = table->GetRecordIdxByteSize(); table->Put("test", 9527, "test", 4); ASSERT_EQ(2, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(1, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(1, (int64_t)table->GetRecordPkCnt()); table->SchedGc(); - ASSERT_EQ(1, (int64_t)table->GetRecordCnt()); - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(1, (int64_t)table->GetRecordIdxCnt()); + if (storageMode == ::openmldb::common::kMemory) { + ASSERT_EQ(1, (int64_t)table->GetRecordCnt()); } + ASSERT_EQ(1, (int64_t)table->GetRecordIdxCnt()); ASSERT_EQ(bytes, table->GetRecordByteSize()); ASSERT_EQ(record_idx_bytes, table->GetRecordIdxByteSize()); @@ -487,11 +467,6 @@ TEST_P(TableTest, SchedGc) { TEST_P(TableTest, TableDataCnt) { ::openmldb::common::StorageMode storageMode = GetParam(); - // some functions with disktable mode in this test have not been implemented. - // refer to issue #1238 - if (storageMode == openmldb::common::kHDD) { - return; - } std::map mapping; mapping.insert(std::make_pair("idx0", 0)); std::string table_path = ""; @@ -508,11 +483,7 @@ TEST_P(TableTest, TableDataCnt) { table->Put("test", 9527, "test", 4); table->Put("test", now, "tes2", 4); ASSERT_EQ((int64_t)table->GetRecordCnt(), 2); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ((int64_t)table->GetRecordIdxCnt(), 2); - } + ASSERT_EQ((int64_t)table->GetRecordIdxCnt(), 2); table->SchedGc(); { ::openmldb::api::LogEntry entry; @@ -1223,11 +1194,6 @@ TEST_P(TableTest, AbsOrLatSetGet) { TEST_P(TableTest, GcAbsOrLat) { ::openmldb::common::StorageMode storageMode = GetParam(); - // some functions with disktable mode in this test have not been implemented. - // refer to issue #1238 - if (storageMode == openmldb::common::kHDD) { - return; - } ::openmldb::api::TableMeta table_meta; table_meta.set_name("table1"); std::string table_path = ""; @@ -1260,44 +1226,30 @@ TEST_P(TableTest, GcAbsOrLat) { table->Put("test2", now - 2 * (60 * 1000) - 1000, "value5", 6); table->Put("test2", now - 1 * (60 * 1000) - 1000, "value6", 6); ASSERT_EQ(7, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(7, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(7, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); ::openmldb::storage::UpdateTTLMeta update_ttl( ::openmldb::storage::TTLSt(3 * 60 * 1000, 0, ::openmldb::storage::kAbsOrLat)); table->SetTTL(update_ttl); table->SchedGc(); - ASSERT_EQ(5, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(5, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); + if (storageMode == openmldb::common::kMemory) { + ASSERT_EQ(5, (int64_t)table->GetRecordCnt()); } + ASSERT_EQ(5, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); update_ttl = ::openmldb::storage::UpdateTTLMeta(::openmldb::storage::TTLSt(0, 1, ::openmldb::storage::kAbsOrLat)); table->SetTTL(update_ttl); table->SchedGc(); ASSERT_EQ(4, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(4, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(4, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); update_ttl = ::openmldb::storage::UpdateTTLMeta( ::openmldb::storage::TTLSt(1 * 60 * 1000, 1, ::openmldb::storage::kAbsOrLat)); table->SetTTL(update_ttl); table->SchedGc(); ASSERT_EQ(2, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); { ::openmldb::api::LogEntry entry; entry.set_log_index(0); @@ -1348,12 +1300,8 @@ TEST_P(TableTest, GcAbsOrLat) { } table->SchedGc(); ASSERT_EQ(0, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(0, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(0, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); { ::openmldb::api::LogEntry entry; entry.set_log_index(0); @@ -1374,11 +1322,6 @@ TEST_P(TableTest, GcAbsOrLat) { TEST_P(TableTest, GcAbsAndLat) { ::openmldb::common::StorageMode storageMode = GetParam(); - // some functions with disktable mode in this test have not been implemented. - // refer to issue #1238 - if (storageMode == openmldb::common::kHDD) { - return; - } ::openmldb::api::TableMeta table_meta; table_meta.set_name("table1"); std::string table_path = ""; @@ -1411,23 +1354,15 @@ TEST_P(TableTest, GcAbsAndLat) { table->Put("test2", now - 3 * (60 * 1000) - 1000, "value5", 6); table->Put("test2", now - 2 * (60 * 1000) - 1000, "value6", 6); ASSERT_EQ(7, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(7, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(7, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); ::openmldb::storage::UpdateTTLMeta update_ttl( ::openmldb::storage::TTLSt(1 * 60 * 1000, 0, ::openmldb::storage::kAbsAndLat)); table->SetTTL(update_ttl); table->SchedGc(); ASSERT_EQ(6, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(6, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(6, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); { ::openmldb::api::LogEntry entry; entry.set_log_index(0); @@ -1468,31 +1403,19 @@ TEST_P(TableTest, GcAbsAndLat) { table->SetTTL(update_ttl); table->SchedGc(); ASSERT_EQ(6, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(6, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(6, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); update_ttl = ::openmldb::storage::UpdateTTLMeta( ::openmldb::storage::TTLSt(1 * 60 * 1000, 1, ::openmldb::storage::kAbsAndLat)); table->SetTTL(update_ttl); table->SchedGc(); ASSERT_EQ(6, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(6, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(6, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); table->SchedGc(); ASSERT_EQ(2, (int64_t)table->GetRecordCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - if (storageMode == ::openmldb::common::StorageMode::kMemory) { - ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); - ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); - } + ASSERT_EQ(2, (int64_t)table->GetRecordIdxCnt()); + ASSERT_EQ(2, (int64_t)table->GetRecordPkCnt()); { ::openmldb::api::LogEntry entry; entry.set_log_index(0); From 933750c0b41884999bbe56e1030cb169eb98fd49 Mon Sep 17 00:00:00 2001 From: litongxin Date: Tue, 12 Jul 2022 14:49:41 +0000 Subject: [PATCH 21/23] skip 2 tests --- src/storage/table_test.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/storage/table_test.cc b/src/storage/table_test.cc index e352c8bb424..f17f7ef9cc0 100644 --- a/src/storage/table_test.cc +++ b/src/storage/table_test.cc @@ -1194,6 +1194,11 @@ TEST_P(TableTest, AbsOrLatSetGet) { TEST_P(TableTest, GcAbsOrLat) { ::openmldb::common::StorageMode storageMode = GetParam(); + // RecordIdxCnt in disktable only support abs TTL and lat TTL + if (storageMode != openmldb::common::kMemory) { + GTEST_SKIP(); + } + ::openmldb::api::TableMeta table_meta; table_meta.set_name("table1"); std::string table_path = ""; @@ -1322,6 +1327,11 @@ TEST_P(TableTest, GcAbsOrLat) { TEST_P(TableTest, GcAbsAndLat) { ::openmldb::common::StorageMode storageMode = GetParam(); + // RecordIdxCnt in disktable only support abs TTL and lat TTL + if (storageMode != openmldb::common::kMemory) { + GTEST_SKIP(); + } + ::openmldb::api::TableMeta table_meta; table_meta.set_name("table1"); std::string table_path = ""; From 58dc383b46e2b578e34ca28b0cb104edbef55122 Mon Sep 17 00:00:00 2001 From: litongxin Date: Tue, 12 Jul 2022 16:28:21 +0000 Subject: [PATCH 22/23] move back tests in disk_table_test --- src/storage/disk_table_test.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/storage/disk_table_test.cc b/src/storage/disk_table_test.cc index dcaaf94d13a..e5f8c080538 100644 --- a/src/storage/disk_table_test.cc +++ b/src/storage/disk_table_test.cc @@ -120,10 +120,8 @@ TEST_F(DiskTableTest, MultiDimensionPut) { ::openmldb::common::StorageMode::kHDD, table_path); ASSERT_TRUE(table->Init()); ASSERT_EQ(3, (int64_t)table->GetIdxCnt()); - // some functions in disk table need to be implemented. - // refer to issue #1238 - // ASSERT_EQ(0, table->GetRecordIdxCnt()); - // ASSERT_EQ(0, table->GetRecordCnt()); + ASSERT_EQ(0, table->GetRecordIdxCnt()); + ASSERT_EQ(0, table->GetRecordCnt()); auto meta = ::openmldb::test::GetTableMeta({"idx0", "idx1", "idx2"}); ::openmldb::codec::SDKCodec sdk_codec(meta); @@ -143,9 +141,7 @@ TEST_F(DiskTableTest, MultiDimensionPut) { ASSERT_EQ(0, sdk_codec.EncodeRow(row, &value)); bool ok = table->Put(1, value, dimensions); ASSERT_TRUE(ok); - // some functions in disk table need to be implemented. - // refer to issue #1238 - // ASSERT_EQ(3, table->GetRecordIdxCnt()); + ASSERT_EQ(3, table->GetRecordIdxCnt()); Ticket ticket; TableIterator* it = table->NewIterator(0, "yjdim0", ticket); From 40f20e3aeb3e70c6e3bc868814c062174c9b7bef Mon Sep 17 00:00:00 2001 From: litongxin Date: Thu, 21 Jul 2022 13:35:36 +0000 Subject: [PATCH 23/23] rename some functions and methods --- src/flags.cc | 4 ++-- src/storage/disk_table.cc | 31 ++++++++++++++----------------- src/storage/disk_table.h | 14 +++++++------- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/flags.cc b/src/flags.cc index 772ee750ead..9635c73f2f6 100644 --- a/src/flags.cc +++ b/src/flags.cc @@ -121,8 +121,8 @@ DEFINE_int32(snapshot_pool_size, 1, "the size of tablet thread pool for making s DEFINE_uint32(load_index_max_wait_time, 120 * 60 * 1000, "config the max wait time of load index"); -DEFINE_uint32(bloom_filter_bitset_size, 10000, "config the size of bitset in bloom filter"); -DEFINE_uint32(bloom_filter_hash_seed, 7, "config the count of hash seed in bloom filter, max 7"); +DEFINE_uint32(disk_stat_bloom_filter_bitset_size, 10000, "config the size of bitset in bloom filter"); +DEFINE_uint32(disk_stat_bloom_filter_hash_seed, 7, "config the count of hash seed in bloom filter, max 7"); DEFINE_string(recycle_bin_root_path, "/tmp/recycle", "specify the root path of recycle bin"); DEFINE_string(recycle_bin_ssd_root_path, "", "specify the root path of recycle bin in ssd"); diff --git a/src/storage/disk_table.cc b/src/storage/disk_table.cc index 1d2bf572ba1..649653127f0 100644 --- a/src/storage/disk_table.cc +++ b/src/storage/disk_table.cc @@ -31,9 +31,6 @@ DECLARE_uint32(write_buffer_mb); DECLARE_uint32(block_cache_shardbits); DECLARE_bool(verify_compression); -DECLARE_uint32(bloom_filter_bitset_size); -DECLARE_uint32(bloom_filter_hash_seed); - namespace openmldb { namespace storage { @@ -392,13 +389,13 @@ bool DiskTable::Get(uint32_t idx, const std::string& pk, uint64_t ts, std::strin bool DiskTable::Get(const std::string& pk, uint64_t ts, std::string& value) { return Get(0, pk, ts, value); } void DiskTable::SchedGc() { - ClearRecord(); + ResetRecordCnt(); GcHead(); GcTTL(); UpdateTTL(); } -void DiskTable::ClearRecord() { +void DiskTable::ResetRecordCnt() { auto indexs = table_index_.GetAllIndex(); for (const auto& index : indexs) { idx_cnt_vec_[index->GetId()]->store(0, std::memory_order_relaxed); @@ -426,7 +423,7 @@ void DiskTable::GcHead() { if (indexs.size() > 1) { std::map ttl_map; std::map idx_map; - std::set other_TTL_set; + std::set other_ttl_set; for (const auto& index : indexs) { auto ts_col = index->GetTsColumn(); if (ts_col) { @@ -436,7 +433,7 @@ void DiskTable::GcHead() { } auto TTL_type = index->GetTTLType(); if (TTL_type != openmldb::storage::TTLType::kLatestTime) { - other_TTL_set.insert(ts_col->GetId()); + other_ttl_set.insert(ts_col->GetId()); } idx_map.emplace(ts_col->GetId(), index->GetId()); } @@ -449,7 +446,7 @@ void DiskTable::GcHead() { uint64_t ts = 0; uint32_t ts_idx = 0; ParseKeyAndTs(true, it->key(), cur_pk, ts, ts_idx); - if (other_TTL_set.find(ts_idx) != other_TTL_set.end()) { + if (other_ttl_set.find(ts_idx) != other_ttl_set.end()) { it->Next(); continue; } @@ -1335,7 +1332,7 @@ int DiskTable::GetCount(uint32_t index, const std::string& pk, uint64_t& count) return 0; } -uint32_t BloomFilter::hash(const char* str, uint32_t seed) { +uint32_t BloomFilter::Hash(const char* str, uint32_t seed) { uint a = 63689; uint hash = 0; @@ -1347,14 +1344,14 @@ uint32_t BloomFilter::hash(const char* str, uint32_t seed) { return (hash & 0x7FFFFFFF); } -void BloomFilter::setBit(uint32_t bit) { +void BloomFilter::SetBit(uint32_t bit) { uint32_t bits_num = bit / 64; uint32_t bits_left = bit % 64; bits_[bits_num]->fetch_or((uint64_t)1 << bits_left, std::memory_order_relaxed); } -bool BloomFilter::getBit(uint32_t bit) { +bool BloomFilter::GetBit(uint32_t bit) { uint32_t bits_num = bit / 64; uint32_t bits_left = bit % 64; @@ -1362,16 +1359,16 @@ bool BloomFilter::getBit(uint32_t bit) { } void BloomFilter::Set(const char* str) { - for (uint32_t i = 0; i < FLAGS_bloom_filter_hash_seed; ++i) { - uint32_t p = hash(str, base_[i]) % FLAGS_bloom_filter_bitset_size; - setBit(p); + for (uint32_t i = 0; i < FLAGS_disk_stat_bloom_filter_hash_seed; ++i) { + uint32_t p = Hash(str, base_[i]) % FLAGS_disk_stat_bloom_filter_bitset_size; + SetBit(p); } } bool BloomFilter::Valid(const char* str) { - for (uint32_t i = 0; i < FLAGS_bloom_filter_hash_seed; ++i) { - uint32_t p = hash(str, base_[i]) % FLAGS_bloom_filter_bitset_size; - if (!getBit(p)) { + for (uint32_t i = 0; i < FLAGS_disk_stat_bloom_filter_hash_seed; ++i) { + uint32_t p = Hash(str, base_[i]) % FLAGS_disk_stat_bloom_filter_bitset_size; + if (!GetBit(p)) { return false; } } diff --git a/src/storage/disk_table.h b/src/storage/disk_table.h index ccd7a591644..e5f6599dabd 100644 --- a/src/storage/disk_table.h +++ b/src/storage/disk_table.h @@ -41,8 +41,8 @@ #include "storage/table.h" #include "base/glog_wapper.h" // NOLINT -DECLARE_uint32(bloom_filter_bitset_size); -DECLARE_uint32(bloom_filter_hash_seed); +DECLARE_uint32(disk_stat_bloom_filter_bitset_size); +DECLARE_uint32(disk_stat_bloom_filter_hash_seed); namespace openmldb { namespace storage { @@ -133,7 +133,7 @@ class KeyTSComparator : public rocksdb::Comparator { class BloomFilter { public: BloomFilter() { - for (uint32_t i = 0; i < FLAGS_bloom_filter_bitset_size; i++) { + for (uint32_t i = 0; i < FLAGS_disk_stat_bloom_filter_bitset_size; i++) { bits_.push_back(std::make_shared>(0)); } } @@ -144,9 +144,9 @@ class BloomFilter { void Reset(); private: - uint32_t hash(const char* str, uint32_t seed); - void setBit(uint32_t bit); - bool getBit(uint32_t bit); + uint32_t Hash(const char* str, uint32_t seed); + void SetBit(uint32_t bit); + bool GetBit(uint32_t bit); std::vector>> bits_; uint32_t base_[7] = {5, 7, 11, 13, 31, 37, 61}; @@ -520,7 +520,7 @@ class DiskTable : public Table { void SchedGc() override; - void ClearRecord(); + void ResetRecordCnt(); void GcHead(); void GcTTL(); void GcTTLAndHead();