Skip to content

Commit

Permalink
Fault injection in db_stress (facebook#6538)
Browse files Browse the repository at this point in the history
Summary:
This PR implements a fault injection mechanism for injecting errors in reads in db_stress. The FaultInjectionTestFS is used for this purpose. A thread local structure is used to track the errors, so that each db_stress thread can independently enable/disable error injection and verify observed errors against expected errors. This is initially enabled only for Get and MultiGet, but can be extended to iterator as well once its proven stable.
Pull Request resolved: facebook#6538

Test Plan:
crash_test
make check

Reviewed By: riversand963

Differential Revision: D20714347

Pulled By: anand1976

fbshipit-source-id: d7598321d4a2d72bda0ced57411a337a91d87dc7
  • Loading branch information
anand76 authored and facebook-github-bot committed Apr 11, 2020
1 parent 0c05624 commit 5c19a44
Show file tree
Hide file tree
Showing 19 changed files with 469 additions and 23 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,12 @@ BENCHTOOLOBJECTS = $(BENCH_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL)

ANALYZETOOLOBJECTS = $(ANALYZER_LIB_SOURCES:.cc=.o)

ifeq ($(DEBUG_LEVEL),0)
STRESSTOOLOBJECTS = $(STRESS_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL)
else
STRESSTOOLOBJECTS = $(STRESS_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) \
$(TESTHARNESS)
endif

EXPOBJECTS = $(LIBOBJECTS) $(TESTUTIL)

Expand Down
5 changes: 4 additions & 1 deletion TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ is_opt_mode = build_mode.startswith("opt")
# -DNDEBUG is added by default in opt mode in fbcode. But adding it twice
# doesn't harm and avoid forgetting to add it.
ROCKSDB_COMPILER_FLAGS += (["-DNDEBUG"] if is_opt_mode else [])
ROCKSDB_STRESS_DEPS = (
[":rocksdb_lib", ":rocksdb_test_lib"] if not is_opt_mode else [":rocksdb_lib"]
)

sanitizer = read_config("fbcode", "sanitizer")

Expand Down Expand Up @@ -436,7 +439,7 @@ cpp_library(
os_deps = ROCKSDB_OS_DEPS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
deps = [":rocksdb_lib"],
deps = ROCKSDB_STRESS_DEPS,
external_deps = ROCKSDB_EXTERNAL_DEPS,
)

Expand Down
4 changes: 4 additions & 0 deletions build_tools/build_detect_platform
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ JAVAC_ARGS="-source 7"
if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
# Cross-compiling; do not try any compilation tests.
# Also don't need any compilation tests if compiling on fbcode
if [ "$FBCODE_BUILD" = "true" ]; then
# Enable backtrace on fbcode since the necessary libraries are present
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
fi
true
else
if ! test $ROCKSDB_DISABLE_FALLOCATE; then
Expand Down
5 changes: 5 additions & 0 deletions db_stress_tool/db_stress_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
#include <cmath>

ROCKSDB_NAMESPACE::DbStressEnvWrapper* db_stress_env = nullptr;
#ifndef NDEBUG
// If non-null, injects read error at a rate specified by the
// read_fault_one_in flag
std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
#endif // NDEBUG
enum ROCKSDB_NAMESPACE::CompressionType compression_type_e =
ROCKSDB_NAMESPACE::kSnappyCompression;
enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e =
Expand Down
10 changes: 6 additions & 4 deletions db_stress_tool/db_stress_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h"
#include "rocksdb/write_batch.h"
#ifndef NDEBUG
#include "test_util/fault_injection_test_fs.h"
#endif
#include "util/coding.h"
#include "util/compression.h"
#include "util/crc32c.h"
Expand All @@ -66,10 +69,6 @@
#include "util/random.h"
#include "util/string_util.h"
#include "utilities/blob_db/blob_db.h"
// SyncPoint is not supported in Released Windows Mode.
#if !(defined NDEBUG) || !defined(OS_WIN)
#include "test_util/sync_point.h"
#endif // !(defined NDEBUG) || !defined(OS_WIN)
#include "test_util/testutil.h"

#include "utilities/merge_operators.h"
Expand Down Expand Up @@ -237,6 +236,9 @@ const int kValueMaxLen = 100;

// wrapped posix or hdfs environment
extern ROCKSDB_NAMESPACE::DbStressEnvWrapper* db_stress_env;
#ifndef NDEBUG
extern std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
#endif

extern enum ROCKSDB_NAMESPACE::CompressionType compression_type_e;
extern enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e;
Expand Down
3 changes: 3 additions & 0 deletions db_stress_tool/db_stress_gflags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -671,4 +671,7 @@ DEFINE_int32(continuous_verification_interval, 1000,
DEFINE_int32(approximate_size_one_in, 64,
"If non-zero, DB::GetApproximateSizes() will be called against"
" random key ranges.");

DEFINE_int32(read_fault_one_in, 1000,
"On non-zero, enables fault injection on read");
#endif // GFLAGS
9 changes: 9 additions & 0 deletions db_stress_tool/db_stress_shared_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,14 @@
namespace ROCKSDB_NAMESPACE {
const uint32_t SharedState::UNKNOWN_SENTINEL = 0xfffffffe;
const uint32_t SharedState::DELETION_SENTINEL = 0xffffffff;
#if defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
#if defined(OS_SOLARIS)
__thread bool SharedState::filter_read_error;
#else
thread_local bool SharedState::filter_read_error;
#endif // OS_SOLARIS
#else
bool SharedState::filter_read_error;
#endif // ROCKSDB_SUPPORT_THREAD_LOCAL
} // namespace ROCKSDB_NAMESPACE
#endif // GFLAGS
39 changes: 38 additions & 1 deletion db_stress_tool/db_stress_shared_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#pragma once

#include "db_stress_tool/db_stress_stat.h"
// SyncPoint is not supported in Released Windows Mode.
#if !(defined NDEBUG) || !defined(OS_WIN)
#include "test_util/sync_point.h"
#endif // !(defined NDEBUG) || !defined(OS_WIN)
#include "util/gflags_compat.h"

DECLARE_uint64(seed);
Expand All @@ -24,6 +28,7 @@ DECLARE_int32(clear_column_family_one_in);
DECLARE_bool(test_batches_snapshots);
DECLARE_int32(compaction_thread_pool_adjust_interval);
DECLARE_int32(continuous_verification_interval);
DECLARE_int32(read_fault_one_in);

namespace ROCKSDB_NAMESPACE {
class StressTest;
Expand All @@ -37,6 +42,20 @@ class SharedState {
// indicates a key should definitely be deleted
static const uint32_t DELETION_SENTINEL;

// Errors when reading filter blocks are ignored, so we use a thread
// local variable updated via sync points to keep track of errors injected
// while reading filter blocks in order to ignore the Get/MultiGet result
// for those calls
#if defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
#if defined(OS_SOLARIS)
static __thread bool filter_read_error;
#else
static thread_local bool filter_read_error;
#endif // OS_SOLARIS
#else
static bool filter_read_error;
#endif // ROCKSDB_SUPPORT_THREAD_LOCAL

SharedState(Env* env, StressTest* stress_test)
: cv_(&mu_),
seed_(static_cast<uint32_t>(FLAGS_seed)),
Expand Down Expand Up @@ -171,9 +190,23 @@ class SharedState {
++num_bg_threads_;
fprintf(stdout, "Starting continuous_verification_thread\n");
}
#ifndef NDEBUG
if (FLAGS_read_fault_one_in) {
SyncPoint::GetInstance()->SetCallBack("FilterReadError",
FilterReadErrorCallback);
SyncPoint::GetInstance()->EnableProcessing();
}
#endif // NDEBUG
}

~SharedState() {}
~SharedState() {
#ifndef NDEBUG
if (FLAGS_read_fault_one_in) {
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
}
#endif
}

port::Mutex* GetMutex() { return &mu_; }

Expand Down Expand Up @@ -329,6 +362,10 @@ class SharedState {
}

private:
static void FilterReadErrorCallback(void*) {
filter_read_error = true;
}

port::Mutex mu_;
port::CondVar cv_;
const uint32_t seed_;
Expand Down
7 changes: 7 additions & 0 deletions db_stress_tool/db_stress_stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Stats {
long range_deletions_;
long covered_by_range_deletions_;
long errors_;
long verified_errors_;
long num_compact_files_succeed_;
long num_compact_files_failed_;
int next_report_;
Expand All @@ -67,6 +68,7 @@ class Stats {
range_deletions_ = 0;
covered_by_range_deletions_ = 0;
errors_ = 0;
verified_errors_ = 0;
bytes_ = 0;
seconds_ = 0;
num_compact_files_succeed_ = 0;
Expand All @@ -90,6 +92,7 @@ class Stats {
range_deletions_ += other.range_deletions_;
covered_by_range_deletions_ = other.covered_by_range_deletions_;
errors_ += other.errors_;
verified_errors_ += other.verified_errors_;
bytes_ += other.bytes_;
seconds_ += other.seconds_;
num_compact_files_succeed_ += other.num_compact_files_succeed_;
Expand Down Expand Up @@ -163,6 +166,8 @@ class Stats {

void AddErrors(long n) { errors_ += n; }

void AddVerifiedErrors(long n) { verified_errors_ += n; }

void AddNumCompactFilesSucceed(long n) { num_compact_files_succeed_ += n; }

void AddNumCompactFilesFailed(long n) { num_compact_files_failed_ += n; }
Expand Down Expand Up @@ -199,6 +204,8 @@ class Stats {
covered_by_range_deletions_);

fprintf(stdout, "%-12s: Got errors %ld times\n", "", errors_);
fprintf(stdout, "%-12s: Got expected errors %ld times\n", "",
verified_errors_);
fprintf(stdout, "%-12s: %ld CompactFiles() succeed\n", "",
num_compact_files_succeed_);
fprintf(stdout, "%-12s: %ld CompactFiles() did not succeed\n", "",
Expand Down
7 changes: 7 additions & 0 deletions db_stress_tool/db_stress_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,12 @@ void StressTest::OperateDb(ThreadState* thread) {
const int delRangeBound = delBound + static_cast<int>(FLAGS_delrangepercent);
const uint64_t ops_per_open = FLAGS_ops_per_thread / (FLAGS_reopen + 1);

#ifndef NDEBUG
if (FLAGS_read_fault_one_in) {
fault_fs_guard->SetThreadLocalReadErrorContext(thread->shared->GetSeed(),
FLAGS_read_fault_one_in);
}
#endif // NDEBUG
thread->stats.Start();
for (int open_cnt = 0; open_cnt <= FLAGS_reopen; ++open_cnt) {
if (thread->shared->HasVerificationFailedYet() ||
Expand Down Expand Up @@ -1721,6 +1727,7 @@ void StressTest::PrintEnv() const {
FLAGS_max_write_batch_group_size_bytes);
fprintf(stdout, "Use dynamic level : %d\n",
static_cast<int>(FLAGS_level_compaction_dynamic_level_bytes));
fprintf(stdout, "Read fault one in : %d\n", FLAGS_read_fault_one_in);

fprintf(stdout, "------------------------------------------------\n");
}
Expand Down
15 changes: 15 additions & 0 deletions db_stress_tool/db_stress_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,15 @@
#ifdef GFLAGS
#include "db_stress_tool/db_stress_common.h"
#include "db_stress_tool/db_stress_driver.h"
#ifndef NDEBUG
#include "test_util/fault_injection_test_fs.h"
#endif

namespace ROCKSDB_NAMESPACE {
namespace {
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
static std::shared_ptr<ROCKSDB_NAMESPACE::DbStressEnvWrapper> env_wrapper_guard;
static std::shared_ptr<CompositeEnvWrapper> fault_env_guard;
} // namespace

KeyGenContext key_gen_ctx;
Expand Down Expand Up @@ -69,6 +73,17 @@ int db_stress_tool(int argc, char** argv) {
} else {
raw_env = Env::Default();
}
#ifndef NDEBUG
if (FLAGS_read_fault_one_in) {
FaultInjectionTestFS* fs =
new FaultInjectionTestFS(raw_env->GetFileSystem());
fault_fs_guard.reset(fs);
fault_fs_guard->SetFilesystemDirectWritable(true);
fault_env_guard =
std::make_shared<CompositeEnvWrapper>(raw_env, fault_fs_guard);
raw_env = fault_env_guard.get();
}
#endif
env_wrapper_guard = std::make_shared<DbStressEnvWrapper>(raw_env);
db_stress_env = env_wrapper_guard.get();

Expand Down
Loading

0 comments on commit 5c19a44

Please sign in to comment.