Skip to content

Commit eac6b6d

Browse files
anand1976facebook-github-bot
authored andcommitted
Ignore async_io ReadOption if FileSystem doesn't support it (facebook#11296)
Summary: In PosixFileSystem, IO uring support is opt-in. If the support is not enabled by the user, then ignore the async_io ReadOption in MultiGet and iteration at the top, rather than follow the async_io codepath and transparently switch to sync IO at the FileSystem layer. Pull Request resolved: facebook#11296 Test Plan: Add new unit tests Reviewed By: akankshamahajan15 Differential Revision: D44045776 Pulled By: anand1976 fbshipit-source-id: a0881bf763ca2fde50b84063d0068bb521edd8b9
1 parent a72d55c commit eac6b6d

11 files changed

+204
-80
lines changed

HISTORY.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
## Unreleased
33
### Behavior changes
44
* Compaction output file cutting logic now considers range tombstone start keys. For example, SST partitioner now may receive ParitionRequest for range tombstone start keys.
5+
* If the async_io ReadOption is specified for MultiGet or NewIterator on a platform that doesn't support IO uring, the option is ignored and synchronous IO is used.
56

67
### Bug Fixes
78
* Fixed an issue for backward iteration when user defined timestamp is enabled in combination with BlobDB.

db/arena_wrapped_db_iter.cc

+3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ void ArenaWrappedDBIter::Init(
4747
read_options_ = read_options;
4848
allow_refresh_ = allow_refresh;
4949
memtable_range_tombstone_iter_ = nullptr;
50+
if (!env->GetFileSystem()->use_async_io()) {
51+
read_options_.async_io = false;
52+
}
5053
}
5154

5255
Status ArenaWrappedDBIter::Refresh() {

db/db_basic_test.cc

+16-13
Original file line numberDiff line numberDiff line change
@@ -2302,9 +2302,7 @@ TEST_P(DBMultiGetAsyncIOTest, GetFromL0) {
23022302
ASSERT_EQ(multiget_io_batch_size.count, 3);
23032303
}
23042304
#else // ROCKSDB_IOURING_PRESENT
2305-
if (GetParam()) {
2306-
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
2307-
}
2305+
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
23082306
#endif // ROCKSDB_IOURING_PRESENT
23092307
}
23102308

@@ -2338,16 +2336,18 @@ TEST_P(DBMultiGetAsyncIOTest, GetFromL1) {
23382336
ASSERT_EQ(values[1], "val_l1_" + std::to_string(54));
23392337
ASSERT_EQ(values[2], "val_l1_" + std::to_string(102));
23402338

2341-
#ifdef ROCKSDB_IOURING_PRESENT
23422339
HistogramData multiget_io_batch_size;
23432340

23442341
statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);
23452342

2343+
#ifdef ROCKSDB_IOURING_PRESENT
23462344
// A batch of 3 async IOs is expected, one for each overlapping file in L1
23472345
ASSERT_EQ(multiget_io_batch_size.count, 1);
23482346
ASSERT_EQ(multiget_io_batch_size.max, 3);
2349-
#endif // ROCKSDB_IOURING_PRESENT
23502347
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
2348+
#else // ROCKSDB_IOURING_PRESENT
2349+
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
2350+
#endif // ROCKSDB_IOURING_PRESENT
23512351
}
23522352

23532353
#ifdef ROCKSDB_IOURING_PRESENT
@@ -2531,8 +2531,12 @@ TEST_P(DBMultiGetAsyncIOTest, GetFromL2WithRangeOverlapL0L1) {
25312531
ASSERT_EQ(values[0], "val_l2_" + std::to_string(19));
25322532
ASSERT_EQ(values[1], "val_l2_" + std::to_string(26));
25332533

2534+
#ifdef ROCKSDB_IOURING_PRESENT
25342535
// Bloom filters in L0/L1 will avoid the coroutine calls in those levels
25352536
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 2);
2537+
#else // ROCKSDB_IOURING_PRESENT
2538+
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
2539+
#endif // ROCKSDB_IOURING_PRESENT
25362540
}
25372541

25382542
#ifdef ROCKSDB_IOURING_PRESENT
@@ -2623,18 +2627,17 @@ TEST_P(DBMultiGetAsyncIOTest, GetNoIOUring) {
26232627
dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
26242628
keys.data(), values.data(), statuses.data());
26252629
ASSERT_EQ(values.size(), 3);
2626-
ASSERT_EQ(statuses[0], Status::NotSupported());
2627-
ASSERT_EQ(statuses[1], Status::NotSupported());
2628-
ASSERT_EQ(statuses[2], Status::NotSupported());
2630+
ASSERT_EQ(statuses[0], Status::OK());
2631+
ASSERT_EQ(statuses[1], Status::OK());
2632+
ASSERT_EQ(statuses[2], Status::OK());
26292633

2630-
HistogramData multiget_io_batch_size;
2634+
HistogramData async_read_bytes;
26312635

2632-
statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);
2636+
statistics()->histogramData(ASYNC_READ_BYTES, &async_read_bytes);
26332637

26342638
// A batch of 3 async IOs is expected, one for each overlapping file in L1
2635-
ASSERT_EQ(multiget_io_batch_size.count, 1);
2636-
ASSERT_EQ(multiget_io_batch_size.max, 3);
2637-
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
2639+
ASSERT_EQ(async_read_bytes.count, 0);
2640+
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
26382641
}
26392642

26402643
INSTANTIATE_TEST_CASE_P(DBMultiGetAsyncIOTest, DBMultiGetAsyncIOTest,

db/forward_iterator.cc

+3
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,9 @@ ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
238238
if (sv_) {
239239
RebuildIterators(false);
240240
}
241+
if (!cfd_->ioptions()->env->GetFileSystem()->use_async_io()) {
242+
read_options_.async_io = false;
243+
}
241244

242245
// immutable_status_ is a local aggregation of the
243246
// status of the immutable Iterators.

db/forward_iterator.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ class ForwardIterator : public InternalIterator {
122122
void DeleteIterator(InternalIterator* iter, bool is_arena = false);
123123

124124
DBImpl* const db_;
125-
const ReadOptions read_options_;
125+
ReadOptions read_options_;
126126
ColumnFamilyData* const cfd_;
127127
const SliceTransform* const prefix_extractor_;
128128
const Comparator* user_comparator_;

db/version_set.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -2121,7 +2121,8 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset,
21212121
max_file_size_for_l0_meta_pin_(
21222122
MaxFileSizeForL0MetaPin(mutable_cf_options_)),
21232123
version_number_(version_number),
2124-
io_tracer_(io_tracer) {}
2124+
io_tracer_(io_tracer),
2125+
use_async_io_(env_->GetFileSystem()->use_async_io()) {}
21252126

21262127
Status Version::GetBlob(const ReadOptions& read_options, const Slice& user_key,
21272128
const Slice& blob_index_slice,
@@ -2505,7 +2506,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
25052506
MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
25062507
#if USE_COROUTINES
25072508
if (read_options.async_io && read_options.optimize_multiget_for_io &&
2508-
using_coroutines()) {
2509+
using_coroutines() && use_async_io_) {
25092510
s = MultiGetAsync(read_options, range, &blob_ctxs);
25102511
} else
25112512
#endif // USE_COROUTINES
@@ -2531,7 +2532,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
25312532
// Avoid using the coroutine version if we're looking in a L0 file, since
25322533
// L0 files won't be parallelized anyway. The regular synchronous version
25332534
// is faster.
2534-
if (!read_options.async_io || !using_coroutines() ||
2535+
if (!read_options.async_io || !using_coroutines() || !use_async_io_ ||
25352536
fp.GetHitFileLevel() == 0 || !fp.RemainingOverlapInLevel()) {
25362537
if (f) {
25372538
bool skip_filters =

db/version_set.h

+1
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,7 @@ class Version {
10751075
// used for debugging and logging purposes only.
10761076
uint64_t version_number_;
10771077
std::shared_ptr<IOTracer> io_tracer_;
1078+
bool use_async_io_;
10781079

10791080
Version(ColumnFamilyData* cfd, VersionSet* vset, const FileOptions& file_opt,
10801081
MutableCFOptions mutable_cf_options,

env/fs_posix.cc

+8
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,14 @@ class PosixFileSystem : public FileSystem {
11831183
#endif
11841184
}
11851185

1186+
bool use_async_io() override {
1187+
#if defined(ROCKSDB_IOURING_PRESENT)
1188+
return IsIOUringEnabled();
1189+
#else
1190+
return false;
1191+
#endif
1192+
}
1193+
11861194
#if defined(ROCKSDB_IOURING_PRESENT)
11871195
// io_uring instance
11881196
std::unique_ptr<ThreadLocalPtr> thread_local_io_urings_;

0 commit comments

Comments
 (0)