Skip to content

Commit 15325bf

Browse files
committed
First version of rocksdb_dump and rocksdb_undump.
Summary: Hack up rocksdb_dump and rocksdb_undump utilities to get this task rolling/promote discussion. Test Plan: Dump/undump databases recursively to see if nothing is lost. Reviewers: sdong, yhchiang, rven, anthony, kradhakrishnan, igor Reviewed By: igor Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D37269
1 parent 04251e1 commit 15325bf

7 files changed

+321
-1
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ package/
3333
.phutil_module_cache
3434
unity
3535
tags
36+
rocksdb_dump
37+
rocksdb_undump
3638

3739
java/out
3840
java/target

DUMP_FORMAT.md

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
## RocksDB dump format
2+
3+
The version 1 RocksDB dump format is fairly simple:
4+
5+
1) The dump starts with the magic 8 byte identifier "ROCKDUMP"
6+
7+
2) The magic is followed by an 8 byte big-endian version which is 0x00000001.
8+
9+
3) Next are arbitrarily sized chunks of bytes prepended by 4 byte little endian number indicating how large each chunk is.
10+
11+
4) The first chunk is special and is a json string indicating some things about the creation of this dump. It contains the following keys:
12+
* database-path: The path of the database this dump was created from.
13+
* hostname: The hostname of the machine where the dump was created.
14+
* creation-time: Unix seconds since epoc when this dump was created.
15+
16+
5) Following the info dump the slices paired into are key/value pairs.

Makefile

+11-1
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,9 @@ TOOLS = \
300300
db_sanity_test \
301301
db_stress \
302302
ldb \
303-
db_repl_stress
303+
db_repl_stress \
304+
rocksdb_dump \
305+
rocksdb_undump
304306

305307
BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench
306308

@@ -516,6 +518,8 @@ check: all
516518
echo "===== Running $$t"; ./$$t || exit 1; done; \
517519
fi
518520
rm -rf $(TMPD)
521+
python tools/ldb_test.py
522+
sh tools/rocksdb_dump_test.sh
519523

520524
check_some: $(SUBSET) ldb_tests
521525
for t in $(SUBSET); do echo "===== Running $$t"; ./$$t || exit 1; done
@@ -795,6 +799,12 @@ deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS)
795799
geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS)
796800
$(AM_LINK)
797801

802+
rocksdb_dump: tools/dump/rocksdb_dump.o $(LIBOBJECTS)
803+
$(AM_LINK)
804+
805+
rocksdb_undump: tools/dump/rocksdb_undump.o $(LIBOBJECTS)
806+
$(AM_LINK)
807+
798808
cuckoo_table_builder_test: table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS)
799809
$(AM_LINK)
800810

tools/dump/rocksdb_dump.cc

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
2+
// This source code is licensed under the BSD-style license found in the
3+
// LICENSE file in the root directory of this source tree. An additional grant
4+
// of patent rights can be found in the PATENTS file in the same directory.
5+
6+
#ifndef GFLAGS
7+
#include <cstdio>
8+
int main() {
9+
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
10+
return 1;
11+
}
12+
#else
13+
14+
#include <gflags/gflags.h>
15+
#include <iostream>
16+
17+
#include "rocksdb/db.h"
18+
#include "rocksdb/env.h"
19+
#include "util/coding.h"
20+
21+
DEFINE_bool(anonymous, false, "Output an empty information blob.");
22+
23+
void usage(const char* name) {
24+
std::cout << "usage: " << name << " [--anonymous] <db> <dumpfile>"
25+
<< std::endl;
26+
}
27+
28+
int main(int argc, char** argv) {
29+
rocksdb::DB* dbptr;
30+
rocksdb::Options options;
31+
rocksdb::Status status;
32+
std::unique_ptr<rocksdb::WritableFile> dumpfile;
33+
char hostname[1024];
34+
int64_t timesec;
35+
std::string abspath;
36+
char json[4096];
37+
38+
GFLAGS::ParseCommandLineFlags(&argc, &argv, true);
39+
40+
static const char* magicstr = "ROCKDUMP";
41+
static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};
42+
43+
if (argc != 3) {
44+
usage(argv[0]);
45+
exit(1);
46+
}
47+
48+
rocksdb::Env* env = rocksdb::Env::Default();
49+
50+
// Open the database
51+
options.create_if_missing = false;
52+
status = rocksdb::DB::OpenForReadOnly(options, argv[1], &dbptr);
53+
if (!status.ok()) {
54+
std::cerr << "Unable to open database '" << argv[1]
55+
<< "' for reading: " << status.ToString() << std::endl;
56+
exit(1);
57+
}
58+
59+
const std::unique_ptr<rocksdb::DB> db(dbptr);
60+
61+
status = env->NewWritableFile(argv[2], &dumpfile, rocksdb::EnvOptions());
62+
if (!status.ok()) {
63+
std::cerr << "Unable to open dump file '" << argv[2]
64+
<< "' for writing: " << status.ToString() << std::endl;
65+
exit(1);
66+
}
67+
68+
rocksdb::Slice magicslice(magicstr, 8);
69+
status = dumpfile->Append(magicslice);
70+
if (!status.ok()) {
71+
std::cerr << "Append failed: " << status.ToString() << std::endl;
72+
exit(1);
73+
}
74+
75+
rocksdb::Slice versionslice(versionstr, 8);
76+
status = dumpfile->Append(versionslice);
77+
if (!status.ok()) {
78+
std::cerr << "Append failed: " << status.ToString() << std::endl;
79+
exit(1);
80+
}
81+
82+
if (FLAGS_anonymous) {
83+
snprintf(json, sizeof(json), "{}");
84+
} else {
85+
status = env->GetHostName(hostname, sizeof(hostname));
86+
status = env->GetCurrentTime(&timesec);
87+
status = env->GetAbsolutePath(argv[1], &abspath);
88+
snprintf(json, sizeof(json),
89+
"{ \"database-path\": \"%s\", \"hostname\": \"%s\", "
90+
"\"creation-time\": %ld }",
91+
abspath.c_str(), hostname, timesec);
92+
}
93+
94+
rocksdb::Slice infoslice(json, strlen(json));
95+
char infosize[4];
96+
rocksdb::EncodeFixed32(infosize, (uint32_t)infoslice.size());
97+
rocksdb::Slice infosizeslice(infosize, 4);
98+
status = dumpfile->Append(infosizeslice);
99+
if (!status.ok()) {
100+
std::cerr << "Append failed: " << status.ToString() << std::endl;
101+
exit(1);
102+
}
103+
status = dumpfile->Append(infoslice);
104+
if (!status.ok()) {
105+
std::cerr << "Append failed: " << status.ToString() << std::endl;
106+
exit(1);
107+
}
108+
109+
const std::unique_ptr<rocksdb::Iterator> it(
110+
db->NewIterator(rocksdb::ReadOptions()));
111+
for (it->SeekToFirst(); it->Valid(); it->Next()) {
112+
char keysize[4];
113+
rocksdb::EncodeFixed32(keysize, (uint32_t)it->key().size());
114+
rocksdb::Slice keysizeslice(keysize, 4);
115+
status = dumpfile->Append(keysizeslice);
116+
if (!status.ok()) {
117+
std::cerr << "Append failed: " << status.ToString() << std::endl;
118+
exit(1);
119+
}
120+
status = dumpfile->Append(it->key());
121+
if (!status.ok()) {
122+
std::cerr << "Append failed: " << status.ToString() << std::endl;
123+
exit(1);
124+
}
125+
126+
char valsize[4];
127+
rocksdb::EncodeFixed32(valsize, (uint32_t)it->value().size());
128+
rocksdb::Slice valsizeslice(valsize, 4);
129+
status = dumpfile->Append(valsizeslice);
130+
if (!status.ok()) {
131+
std::cerr << "Append failed: " << status.ToString() << std::endl;
132+
exit(1);
133+
}
134+
status = dumpfile->Append(it->value());
135+
if (!status.ok()) {
136+
std::cerr << "Append failed: " << status.ToString() << std::endl;
137+
exit(1);
138+
}
139+
}
140+
if (!it->status().ok()) {
141+
std::cerr << "Database iteration failed: " << status.ToString()
142+
<< std::endl;
143+
exit(1);
144+
}
145+
146+
return 0;
147+
}
148+
149+
#endif // GFLAGS

tools/dump/rocksdb_undump.cc

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
2+
// This source code is licensed under the BSD-style license found in the
3+
// LICENSE file in the root directory of this source tree. An additional grant
4+
// of patent rights can be found in the PATENTS file in the same directory.
5+
6+
#include <cstring>
7+
#include <iostream>
8+
9+
#include "rocksdb/db.h"
10+
#include "rocksdb/env.h"
11+
#include "util/coding.h"
12+
13+
void usage(const char *name) {
14+
std::cout << "usage: " << name << " <dumpfile> <rocksdb>" << std::endl;
15+
}
16+
17+
int main(int argc, char **argv) {
18+
rocksdb::DB *dbptr;
19+
rocksdb::Options options;
20+
rocksdb::Status status;
21+
rocksdb::Env *env;
22+
std::unique_ptr<rocksdb::SequentialFile> dumpfile;
23+
rocksdb::Slice slice;
24+
char scratch8[8];
25+
26+
static const char *magicstr = "ROCKDUMP";
27+
static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};
28+
29+
if (argc != 3) {
30+
usage(argv[0]);
31+
exit(1);
32+
}
33+
34+
env = rocksdb::Env::Default();
35+
36+
status = env->NewSequentialFile(argv[1], &dumpfile, rocksdb::EnvOptions());
37+
if (!status.ok()) {
38+
std::cerr << "Unable to open dump file '" << argv[1]
39+
<< "' for reading: " << status.ToString() << std::endl;
40+
exit(1);
41+
}
42+
43+
status = dumpfile->Read(8, &slice, scratch8);
44+
if (!status.ok() || slice.size() != 8 ||
45+
memcmp(slice.data(), magicstr, 8) != 0) {
46+
std::cerr << "File '" << argv[1] << "' is not a recognizable dump file."
47+
<< std::endl;
48+
exit(1);
49+
}
50+
51+
status = dumpfile->Read(8, &slice, scratch8);
52+
if (!status.ok() || slice.size() != 8 ||
53+
memcmp(slice.data(), versionstr, 8) != 0) {
54+
std::cerr << "File '" << argv[1] << "' version not recognized."
55+
<< std::endl;
56+
exit(1);
57+
}
58+
59+
status = dumpfile->Read(4, &slice, scratch8);
60+
if (!status.ok() || slice.size() != 4) {
61+
std::cerr << "Unable to read info blob size." << std::endl;
62+
exit(1);
63+
}
64+
uint32_t infosize = rocksdb::DecodeFixed32(slice.data());
65+
status = dumpfile->Skip(infosize);
66+
if (!status.ok()) {
67+
std::cerr << "Unable to skip info blob: " << status.ToString() << std::endl;
68+
exit(1);
69+
}
70+
71+
options.create_if_missing = true;
72+
status = rocksdb::DB::Open(options, argv[2], &dbptr);
73+
if (!status.ok()) {
74+
std::cerr << "Unable to open database '" << argv[2]
75+
<< "' for writing: " << status.ToString() << std::endl;
76+
exit(1);
77+
}
78+
79+
const std::unique_ptr<rocksdb::DB> db(dbptr);
80+
81+
uint32_t last_keysize = 64;
82+
size_t last_valsize = 1 << 20;
83+
std::unique_ptr<char[]> keyscratch(new char[last_keysize]);
84+
std::unique_ptr<char[]> valscratch(new char[last_valsize]);
85+
86+
while (1) {
87+
uint32_t keysize, valsize;
88+
rocksdb::Slice keyslice;
89+
rocksdb::Slice valslice;
90+
91+
status = dumpfile->Read(4, &slice, scratch8);
92+
if (!status.ok() || slice.size() != 4) break;
93+
keysize = rocksdb::DecodeFixed32(slice.data());
94+
if (keysize > last_keysize) {
95+
while (keysize > last_keysize) last_keysize *= 2;
96+
keyscratch = std::unique_ptr<char[]>(new char[last_keysize]);
97+
}
98+
99+
status = dumpfile->Read(keysize, &keyslice, keyscratch.get());
100+
if (!status.ok() || keyslice.size() != keysize) {
101+
std::cerr << "Key read failure: "
102+
<< (status.ok() ? "insufficient data" : status.ToString())
103+
<< std::endl;
104+
exit(1);
105+
}
106+
107+
status = dumpfile->Read(4, &slice, scratch8);
108+
if (!status.ok() || slice.size() != 4) {
109+
std::cerr << "Unable to read value size: "
110+
<< (status.ok() ? "insufficient data" : status.ToString())
111+
<< std::endl;
112+
exit(1);
113+
}
114+
valsize = rocksdb::DecodeFixed32(slice.data());
115+
if (valsize > last_valsize) {
116+
while (valsize > last_valsize) last_valsize *= 2;
117+
valscratch = std::unique_ptr<char[]>(new char[last_valsize]);
118+
}
119+
120+
status = dumpfile->Read(valsize, &valslice, valscratch.get());
121+
if (!status.ok() || valslice.size() != valsize) {
122+
std::cerr << "Unable to read value: "
123+
<< (status.ok() ? "insufficient data" : status.ToString())
124+
<< std::endl;
125+
exit(1);
126+
}
127+
128+
status = db->Put(rocksdb::WriteOptions(), keyslice, valslice);
129+
if (!status.ok()) {
130+
fprintf(stderr, "Unable to write database entry\n");
131+
exit(1);
132+
}
133+
}
134+
135+
return 0;
136+
}

tools/rocksdb_dump_test.sh

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
TESTDIR=`mktemp -d /tmp/rocksdb-dump-test.XXXXX`
2+
DUMPFILE="tools/sample-dump.dmp"
3+
4+
# Verify that the sample dump file is undumpable and then redumpable.
5+
./rocksdb_undump $DUMPFILE $TESTDIR/db
6+
./rocksdb_dump --anonymous $TESTDIR/db $TESTDIR/dump
7+
cmp $DUMPFILE $TESTDIR/dump

tools/sample-dump.dmp

100 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)