Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
mkosieradzki committed Aug 22, 2017
2 parents 85cbbda + 867fe92 commit 15c4017
Show file tree
Hide file tree
Showing 51 changed files with 1,580 additions and 367 deletions.
33 changes: 24 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,26 @@ else()
message(STATUS "JNI library is disabled")
endif()

# Installation and packaging
if(WIN32)
option(ROCKSDB_INSTALL_ON_WINDOWS "Enable install target on Windows" OFF)
endif()
if(NOT WIN32 OR ROCKSDB_INSTALL_ON_WINDOWS)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
# Change default installation prefix on Linux to /usr
set(CMAKE_INSTALL_PREFIX /usr CACHE PATH "Install path prefix, prepended onto install directories." FORCE)
endif()
endif()

include(GNUInstallDirs)
install(TARGETS ${ROCKSDB_STATIC_LIB} COMPONENT devel ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(TARGETS ${ROCKSDB_SHARED_LIB} COMPONENT runtime DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(DIRECTORY include/rocksdb COMPONENT devel DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()

option(WITH_TESTS "build with tests" ON)
if(WITH_TESTS)
set(TESTS
cache/cache_test.cc
cache/lru_cache_test.cc
Expand Down Expand Up @@ -846,14 +866,9 @@ foreach(sourcefile ${C_TEST_EXES})
add_test(NAME ${exename} COMMAND ${exename}${ARTIFACT_SUFFIX})
add_dependencies(check ${exename}${ARTIFACT_SUFFIX})
endforeach(sourcefile ${C_TEST_EXES})
add_subdirectory(tools)
endif()

# Installation and packaging for Linux
if(NOT WIN32)
install(TARGETS ${ROCKSDB_STATIC_LIB} COMPONENT devel ARCHIVE DESTINATION lib64)
install(TARGETS ${ROCKSDB_SHARED_LIB} COMPONENT runtime DESTINATION lib64)
install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/rocksdb/"
COMPONENT devel
DESTINATION include/rocksdb)
set(CMAKE_INSTALL_PREFIX /usr)
option(WITH_TOOLS "build with tools" ON)
if(WITH_TOOLS)
add_subdirectory(tools)
endif()
6 changes: 6 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
# Rocksdb Change Log
## Unreleased
### Public API Change
* Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints.

### New Features
* Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators.
* Replace dynamic_cast<> (except unit test) so people can choose to build with RTTI off. With make, release mode is by default built with -fno-rtti and debug mode is built without it. Users can override it by setting USE_RTTI=0 or 1.
* Universal compactions including the bottom level can be executed in a dedicated thread pool. This alleviates head-of-line blocking in the compaction queue, which cause write stalling, particularly in multi-instance use cases. Users can enable this feature via `Env::SetBackgroundThreads(N, Env::Priority::BOTTOM)`, where `N > 0`.
* Allow merge operator to be called even with a single merge operand during compactions, by appropriately overriding `MergeOperator::AllowSingleOperand`.
* Add `DB::VerifyChecksum()`, which verifies the checksums in all SST files in a running DB.

### Bug Fixes
* Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`.
* Fix incorrect dropping of deletions during intra-L0 compaction.
* Fix transient reappearance of keys covered by range deletions when memtable prefix bloom filter is enabled.

## 5.7.0 (07/13/2017)
### Public API Change
Expand Down
16 changes: 16 additions & 0 deletions cache/lru_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,19 +234,35 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
}

void* LRUCacheShard::operator new(size_t size) {
#if __SANITIZE_ADDRESS__
return malloc(size);
#else
return port::cacheline_aligned_alloc(size);
#endif
}

void* LRUCacheShard::operator new[](size_t size) {
#if __SANITIZE_ADDRESS__
return malloc(size);
#else
return port::cacheline_aligned_alloc(size);
#endif
}

void LRUCacheShard::operator delete(void *memblock) {
#if __SANITIZE_ADDRESS__
free(memblock);
#else
port::cacheline_aligned_free(memblock);
#endif
}

void LRUCacheShard::operator delete[](void* memblock) {
#if __SANITIZE_ADDRESS__
free(memblock);
#else
port::cacheline_aligned_free(memblock);
#endif
}

void LRUCacheShard::SetCapacity(size_t capacity) {
Expand Down
2 changes: 2 additions & 0 deletions db/compaction_iteration_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ struct CompactionIterationStats {
int64_t num_record_drop_obsolete = 0;
int64_t num_record_drop_range_del = 0;
int64_t num_range_del_drop_obsolete = 0;
// Deletions obsoleted before bottom level due to file gap optimization.
int64_t num_optimized_del_drop_obsolete = 0;
uint64_t total_filter_time = 0;

// Input statistics
Expand Down
7 changes: 7 additions & 0 deletions db/compaction_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ void CompactionIterator::ResetRecordCounts() {
iter_stats_.num_record_drop_obsolete = 0;
iter_stats_.num_record_drop_range_del = 0;
iter_stats_.num_range_del_drop_obsolete = 0;
iter_stats_.num_optimized_del_drop_obsolete = 0;
}

void CompactionIterator::SeekToFirst() {
Expand Down Expand Up @@ -426,6 +427,9 @@ void CompactionIterator::NextFromInput() {
// Can compact out this SingleDelete.
++iter_stats_.num_record_drop_obsolete;
++iter_stats_.num_single_del_fallthru;
if (!bottommost_level_) {
++iter_stats_.num_optimized_del_drop_obsolete;
}
} else {
// Output SingleDelete
valid_ = true;
Expand Down Expand Up @@ -467,6 +471,9 @@ void CompactionIterator::NextFromInput() {
// Note: Dropping this Delete will not affect TransactionDB
// write-conflict checking since it is earlier than any snapshot.
++iter_stats_.num_record_drop_obsolete;
if (!bottommost_level_) {
++iter_stats_.num_optimized_del_drop_obsolete;
}
input_->Next();
} else if (ikey_.type == kTypeMerge) {
if (!merge_helper_->HasOperator()) {
Expand Down
105 changes: 105 additions & 0 deletions db/compaction_iterator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,111 @@ TEST_F(CompactionIteratorTest, ShuttingDownInMerge) {
EXPECT_EQ(2, filter.last_seen.load());
}

TEST_F(CompactionIteratorTest, SingleMergeOperand) {
class Filter : public CompactionFilter {
virtual Decision FilterV2(int level, const Slice& key, ValueType t,
const Slice& existing_value,
std::string* new_value,
std::string* skip_until) const override {
std::string k = key.ToString();
std::string v = existing_value.ToString();

// See InitIterators() call below for the sequence of keys and their
// filtering decisions. Here we closely assert that compaction filter is
// called with the expected keys and only them, and with the right values.
if (k == "a") {
EXPECT_EQ(ValueType::kMergeOperand, t);
EXPECT_EQ("av1", v);
return Decision::kKeep;
} else if (k == "b") {
EXPECT_EQ(ValueType::kMergeOperand, t);
return Decision::kKeep;
} else if (k == "c") {
return Decision::kKeep;
}

ADD_FAILURE();
return Decision::kKeep;
}

const char* Name() const override {
return "CompactionIteratorTest.SingleMergeOperand::Filter";
}
};

class SingleMergeOp : public MergeOperator {
public:
bool FullMergeV2(const MergeOperationInput& merge_in,
MergeOperationOutput* merge_out) const override {
// See InitIterators() call below for why "c" is the only key for which
// FullMergeV2 should be called.
EXPECT_EQ("c", merge_in.key.ToString());

std::string temp_value;
if (merge_in.existing_value != nullptr) {
temp_value = merge_in.existing_value->ToString();
}

for (auto& operand : merge_in.operand_list) {
temp_value.append(operand.ToString());
}
merge_out->new_value = temp_value;

return true;
}

bool PartialMergeMulti(const Slice& key,
const std::deque<Slice>& operand_list,
std::string* new_value,
Logger* logger) const override {
std::string string_key = key.ToString();
EXPECT_TRUE(string_key == "a" || string_key == "b");

if (string_key == "a") {
EXPECT_EQ(1, operand_list.size());
} else if (string_key == "b") {
EXPECT_EQ(2, operand_list.size());
}

std::string temp_value;
for (auto& operand : operand_list) {
temp_value.append(operand.ToString());
}
swap(temp_value, *new_value);

return true;
}

const char* Name() const override {
return "CompactionIteratorTest SingleMergeOp";
}

bool AllowSingleOperand() const override { return true; }
};

SingleMergeOp merge_op;
Filter filter;
InitIterators(
// a should invoke PartialMergeMulti with a single merge operand.
{test::KeyStr("a", 50, kTypeMerge),
// b should invoke PartialMergeMulti with two operands.
test::KeyStr("b", 70, kTypeMerge), test::KeyStr("b", 60, kTypeMerge),
// c should invoke FullMerge due to kTypeValue at the beginning.
test::KeyStr("c", 90, kTypeMerge), test::KeyStr("c", 80, kTypeValue)},
{"av1", "bv2", "bv1", "cv2", "cv1"}, {}, {}, kMaxSequenceNumber,
&merge_op, &filter);

c_iter_->SeekToFirst();
ASSERT_TRUE(c_iter_->Valid());
ASSERT_EQ(test::KeyStr("a", 50, kTypeMerge), c_iter_->key().ToString());
ASSERT_EQ("av1", c_iter_->value().ToString());
c_iter_->Next();
ASSERT_TRUE(c_iter_->Valid());
ASSERT_EQ("bv1bv2", c_iter_->value().ToString());
c_iter_->Next();
ASSERT_EQ("cv1cv2", c_iter_->value().ToString());
}

} // namespace rocksdb

int main(int argc, char** argv) {
Expand Down
4 changes: 4 additions & 0 deletions db/compaction_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,10 @@ void CompactionJob::RecordDroppedKeys(
RecordTick(stats_, COMPACTION_RANGE_DEL_DROP_OBSOLETE,
c_iter_stats.num_range_del_drop_obsolete);
}
if (c_iter_stats.num_optimized_del_drop_obsolete > 0) {
RecordTick(stats_, COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
c_iter_stats.num_optimized_del_drop_obsolete);
}
}

Status CompactionJob::FinishCompactionOutputFile(
Expand Down
1 change: 1 addition & 0 deletions db/compaction_picker_universal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ Compaction* UniversalCompactionPicker::PickCompaction(
c->inputs(0)->size());

RegisterCompaction(c);
vstorage->ComputeCompactionScore(ioptions_, mutable_cf_options);

TEST_SYNC_POINT_CALLBACK("UniversalCompactionPicker::PickCompaction:Return",
c);
Expand Down
7 changes: 6 additions & 1 deletion db/db_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,6 @@ TEST_F(DBBasicTest, FLUSH) {
WriteOptions writeOpt = WriteOptions();
writeOpt.disableWAL = true;
SetPerfLevel(kEnableTime);
;
ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
// this will now also flush the last 2 writes
ASSERT_OK(Flush(1));
Expand All @@ -369,6 +368,7 @@ TEST_F(DBBasicTest, FLUSH) {
get_perf_context()->Reset();
Get(1, "foo");
ASSERT_TRUE((int)get_perf_context()->get_from_output_files_time > 0);
ASSERT_EQ(2, (int)get_perf_context()->get_read_bytes);

ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
ASSERT_EQ("v1", Get(1, "foo"));
Expand Down Expand Up @@ -725,6 +725,7 @@ TEST_F(DBBasicTest, FlushOneColumnFamily) {
TEST_F(DBBasicTest, MultiGetSimple) {
do {
CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
SetPerfLevel(kEnableCount);
ASSERT_OK(Put(1, "k1", "v1"));
ASSERT_OK(Put(1, "k2", "v2"));
ASSERT_OK(Put(1, "k3", "v3"));
Expand All @@ -738,19 +739,23 @@ TEST_F(DBBasicTest, MultiGetSimple) {
std::vector<std::string> values(20, "Temporary data to be overwritten");
std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);

get_perf_context()->Reset();
std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
ASSERT_EQ(values.size(), keys.size());
ASSERT_EQ(values[0], "v1");
ASSERT_EQ(values[1], "v2");
ASSERT_EQ(values[2], "v3");
ASSERT_EQ(values[4], "v5");
// four kv pairs * two bytes per value
ASSERT_EQ(8, (int)get_perf_context()->multiget_read_bytes);

ASSERT_OK(s[0]);
ASSERT_OK(s[1]);
ASSERT_OK(s[2]);
ASSERT_TRUE(s[3].IsNotFound());
ASSERT_OK(s[4]);
ASSERT_TRUE(s[5].IsNotFound());
SetPerfLevel(kDisable);
} while (ChangeCompactOptions());
}

Expand Down
40 changes: 40 additions & 0 deletions db/db_compaction_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2684,6 +2684,46 @@ TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) {
ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound());
}

TEST_F(DBCompactionTest, OptimizedDeletionObsoleting) {
// Deletions can be dropped when compacted to non-last level if they fall
// outside the lower-level files' key-ranges.
const int kNumL0Files = 4;
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = kNumL0Files;
options.statistics = rocksdb::CreateDBStatistics();
DestroyAndReopen(options);

// put key 1 and 3 in separate L1, L2 files.
// So key 0, 2, and 4+ fall outside these levels' key-ranges.
for (int level = 2; level >= 1; --level) {
for (int i = 0; i < 2; ++i) {
Put(Key(2 * i + 1), "val");
Flush();
}
MoveFilesToLevel(level);
ASSERT_EQ(2, NumTableFilesAtLevel(level));
}

// Delete keys in range [1, 4]. These L0 files will be compacted with L1:
// - Tombstones for keys 2 and 4 can be dropped early.
// - Tombstones for keys 1 and 3 must be kept due to L2 files' key-ranges.
for (int i = 0; i < kNumL0Files; ++i) {
Put(Key(0), "val"); // sentinel to prevent trivial move
Delete(Key(i + 1));
Flush();
}
dbfull()->TEST_WaitForCompact();

for (int i = 0; i < kNumL0Files; ++i) {
std::string value;
ASSERT_TRUE(db_->Get(ReadOptions(), Key(i + 1), &value).IsNotFound());
}
ASSERT_EQ(2, options.statistics->getTickerCount(
COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE));
ASSERT_EQ(2,
options.statistics->getTickerCount(COMPACTION_KEY_DROP_OBSOLETE));
}

INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
::testing::Values(std::make_tuple(1, true),
std::make_tuple(1, false),
Expand Down
4 changes: 3 additions & 1 deletion db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
size_t size = pinnable_val->size();
RecordTick(stats_, BYTES_READ, size);
MeasureTime(stats_, BYTES_PER_READ, size);
PERF_COUNTER_ADD(get_read_bytes, size);
}
return s;
}
Expand Down Expand Up @@ -1117,6 +1118,7 @@ std::vector<Status> DBImpl::MultiGet(
RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
MeasureTime(stats_, BYTES_PER_MULTIGET, bytes_read);
PERF_COUNTER_ADD(multiget_read_bytes, bytes_read);
PERF_TIMER_STOP(get_post_process_time);

return stat_list;
Expand Down Expand Up @@ -2768,7 +2770,7 @@ Status DBImpl::VerifyChecksum() {
const auto& fd = vstorage->LevelFilesBrief(i).files[j].fd;
std::string fname = TableFileName(immutable_db_options_.db_paths,
fd.GetNumber(), fd.GetPathId());
s = rocksdb::VerifySstFileChecksum(options, env_options, fname);
s = rocksdb::VerifySstFileChecksum(options, env_options, fname);
}
}
if (!s.ok()) {
Expand Down
Loading

0 comments on commit 15c4017

Please sign in to comment.