Skip to content

Commit b443d24

Browse files
riversand963facebook-github-bot
authored andcommitted
Stop operating on DB in a stress test background thread (#10373)
Summary: Stress test background threads do not coordinate with test worker threads for db reopen in the middle of a test run, thus accessing db obj in a stress test bg thread can race with test workers. Remove the TimestampedSnapshotThread. Pull Request resolved: #10373 Test Plan: ``` ./db_stress --acquire_snapshot_one_in=0 --adaptive_readahead=0 --allow_concurrent_memtable_write=1 \ --allow_data_in_errors=True --async_io=0 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 \ --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=8 \ --block_size=16384 --bloom_bits=7.580319535285394 --bottommost_compression_type=disable \ --bytes_per_sync=262144 --cache_index_and_filter_blocks=0 --cache_size=8388608 --cache_type=lru_cache \ --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=0 --charge_filter_construction=1 \ --charge_table_reader=0 --checkpoint_one_in=0 --checksum_type=kxxHash64 --clear_column_family_one_in=0 \ --compact_files_one_in=1000000 --compact_range_one_in=0 --compaction_pri=1 --compaction_ttl=0 \ --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 \ --compression_type=xpress --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=0 \ --continuous_verification_interval=0 --create_timestamped_snapshot_one_in=20 --data_block_index_type=0 \ --db=/dev/shm/rocksdb/ --db_write_buffer_size=0 --delpercent=5 --delrangepercent=0 --destroy_db_initially=1 \ --detect_filter_construct_corruption=0 --disable_wal=0 --enable_compaction_filter=1 --enable_pipelined_write=0 \ --fail_if_options_file_error=1 --file_checksum_impl=xxh64 --flush_one_in=1000000 --format_version=2 \ --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 \ --get_sorted_wal_files_one_in=0 --index_block_restart_interval=11 --index_type=0 --ingest_external_file_one_in=0 \ --iterpercent=0 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=True \ --log2_keys_per_lock=10 --long_running_snapshots=0 --mark_for_compaction_one_file_in=10 \ --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=25000000 \ --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=64 \ --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=0 --memtable_prefix_bloom_size_ratio=0.5 \ --memtable_whole_key_filtering=1 --memtablerep=skip_list --mmap_read=0 --mock_direct_io=True \ --nooverwritepercent=1 --open_files=500000 --open_metadata_write_fault_one_in=0 \ --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=20000 \ --optimize_filters_for_memory=1 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=2 \ --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=1 \ --prefixpercent=5 --prepopulate_block_cache=0 --progress_reports=0 --read_fault_one_in=1000 \ --readpercent=55 --recycle_log_file_num=0 --reopen=100 --ribbon_starting_level=8 \ --secondary_cache_fault_one_in=0 --secondary_cache_uri= --snapshot_hold_ops=100000 \ --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 \ --subcompactions=3 --sync=0 --sync_fault_injection=0 --target_file_size_base=2097152 \ --target_file_size_multiplier=2 --test_batches_snapshots=0 --top_level_index_pinning=1 \ --txn_write_policy=0 --unordered_write=0 --unpartitioned_pinning=0 \ --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=1 --use_full_merge_v1=1 \ --use_merge=1 --use_multiget=0 --use_txn=1 --user_timestamp_size=0 --value_size_mult=32 \ --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 \ --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=none \ --write_buffer_size=4194304 --write_dbid_to_manifest=0 --writepercent=35 ``` make crash_test_with_txn make crash_test_with_multiops_wc_txn Reviewed By: jay-zhuang Differential Revision: D37903189 Pulled By: riversand963 fbshipit-source-id: cd1728ad7ba4ce4cf47af23c4f65dda0956744f9
1 parent e576f2a commit b443d24

File tree

5 files changed

+31
-82
lines changed

5 files changed

+31
-82
lines changed

db_stress_tool/db_stress_common.cc

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -148,42 +148,6 @@ void DbVerificationThread(void* v) {
148148
}
149149
}
150150

151-
void TimestampedSnapshotsThread(void* v) {
152-
assert(FLAGS_create_timestamped_snapshot_one_in > 0);
153-
auto* thread = reinterpret_cast<ThreadState*>(v);
154-
assert(thread);
155-
SharedState* shared = thread->shared;
156-
assert(shared);
157-
StressTest* stress_test = shared->GetStressTest();
158-
assert(stress_test);
159-
while (true) {
160-
{
161-
MutexLock l(shared->GetMutex());
162-
if (shared->ShouldStopBgThread()) {
163-
shared->IncBgThreadsFinished();
164-
if (shared->BgThreadsFinished()) {
165-
shared->GetCondVar()->SignalAll();
166-
}
167-
return;
168-
}
169-
}
170-
171-
uint64_t now = db_stress_env->NowNanos();
172-
std::pair<Status, std::shared_ptr<const Snapshot>> res =
173-
stress_test->CreateTimestampedSnapshot(now);
174-
if (res.first.ok()) {
175-
assert(res.second);
176-
assert(res.second->GetTimestamp() == now);
177-
} else {
178-
assert(!res.second);
179-
}
180-
constexpr uint64_t time_diff = static_cast<uint64_t>(1000) * 1000 * 1000;
181-
stress_test->ReleaseOldTimestampedSnapshots(now - time_diff);
182-
183-
db_stress_env->SleepForMicroseconds(1000 * 1000);
184-
}
185-
}
186-
187151
void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz) {
188152
if (!FLAGS_verbose) {
189153
return;

db_stress_tool/db_stress_driver.cc

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,6 @@ bool RunStressTest(StressTest* stress) {
8484
shared.IncBgThreads();
8585
}
8686

87-
if (FLAGS_create_timestamped_snapshot_one_in > 0) {
88-
shared.IncBgThreads();
89-
}
90-
9187
std::vector<ThreadState*> threads(n);
9288
for (uint32_t i = 0; i < n; i++) {
9389
threads[i] = new ThreadState(i, &shared);
@@ -105,12 +101,6 @@ bool RunStressTest(StressTest* stress) {
105101
&continuous_verification_thread);
106102
}
107103

108-
ThreadState timestamped_snapshots_thread(0, &shared);
109-
if (FLAGS_create_timestamped_snapshot_one_in > 0) {
110-
db_stress_env->StartThread(TimestampedSnapshotsThread,
111-
&timestamped_snapshots_thread);
112-
}
113-
114104
// Each thread goes through the following states:
115105
// initializing -> wait for others to init -> read/populate/depopulate
116106
// wait for others to operate -> verify -> done
@@ -179,8 +169,7 @@ bool RunStressTest(StressTest* stress) {
179169
stress->PrintStatistics();
180170

181171
if (FLAGS_compaction_thread_pool_adjust_interval > 0 ||
182-
FLAGS_continuous_verification_interval > 0 ||
183-
FLAGS_create_timestamped_snapshot_one_in > 0) {
172+
FLAGS_continuous_verification_interval > 0) {
184173
MutexLock l(shared.GetMutex());
185174
shared.SetShouldStopBgThread();
186175
while (!shared.BgThreadsFinished()) {

db_stress_tool/db_stress_test_base.cc

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -421,35 +421,6 @@ void StressTest::PrintStatistics() {
421421
}
422422
}
423423

424-
void StressTest::ReleaseOldTimestampedSnapshots(uint64_t ts) {
425-
#ifndef ROCKSDB_LITE
426-
if (!txn_db_) {
427-
return;
428-
}
429-
assert(txn_db_);
430-
txn_db_->ReleaseTimestampedSnapshotsOlderThan(ts);
431-
#else
432-
(void)ts;
433-
fprintf(stderr, "timestamped snapshots not supported in LITE mode\n");
434-
exit(1);
435-
#endif // ROCKSDB_LITE
436-
}
437-
438-
std::pair<Status, std::shared_ptr<const Snapshot>>
439-
StressTest::CreateTimestampedSnapshot(uint64_t ts) {
440-
#ifndef ROCKSDB_LITE
441-
if (!txn_db_) {
442-
return std::make_pair(Status::InvalidArgument(), nullptr);
443-
}
444-
assert(txn_db_);
445-
return txn_db_->CreateTimestampedSnapshot(ts);
446-
#else
447-
(void)ts;
448-
fprintf(stderr, "timestamped snapshots not supported in LITE mode\n");
449-
exit(1);
450-
#endif // ROCKSDB_LITE
451-
}
452-
453424
// Currently PreloadDb has to be single-threaded.
454425
void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys,
455426
SharedState* shared) {
@@ -594,6 +565,7 @@ Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) {
594565
if (!FLAGS_use_txn) {
595566
return Status::InvalidArgument("CommitTxn when FLAGS_use_txn is not set");
596567
}
568+
assert(txn_db_);
597569
Status s = txn->Prepare();
598570
std::shared_ptr<const Snapshot> timestamped_snapshot;
599571
if (s.ok()) {
@@ -602,10 +574,32 @@ Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) {
602574
uint64_t ts = db_stress_env->NowNanos();
603575
s = txn->CommitAndTryCreateSnapshot(/*notifier=*/nullptr, ts,
604576
&timestamped_snapshot);
577+
578+
std::pair<Status, std::shared_ptr<const Snapshot>> res;
579+
if (thread->tid == 0) {
580+
uint64_t now = db_stress_env->NowNanos();
581+
res = txn_db_->CreateTimestampedSnapshot(now);
582+
if (res.first.ok()) {
583+
assert(res.second);
584+
assert(res.second->GetTimestamp() == now);
585+
if (timestamped_snapshot) {
586+
assert(res.second->GetTimestamp() >
587+
timestamped_snapshot->GetTimestamp());
588+
}
589+
} else {
590+
assert(!res.second);
591+
}
592+
}
605593
} else {
606594
s = txn->Commit();
607595
}
608596
}
597+
if (thread && FLAGS_create_timestamped_snapshot_one_in > 0 &&
598+
thread->rand.OneInOpt(50000)) {
599+
uint64_t now = db_stress_env->NowNanos();
600+
constexpr uint64_t time_diff = static_cast<uint64_t>(1000) * 1000 * 1000;
601+
txn_db_->ReleaseTimestampedSnapshotsOlderThan(now - time_diff);
602+
}
609603
delete txn;
610604
return s;
611605
}

db_stress_tool/db_stress_test_base.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,6 @@ class StressTest {
4343

4444
void PrintStatistics();
4545

46-
void ReleaseOldTimestampedSnapshots(uint64_t ts);
47-
48-
std::pair<Status, std::shared_ptr<const Snapshot>> CreateTimestampedSnapshot(
49-
uint64_t ts);
50-
5146
protected:
5247
Status AssertSame(DB* db, ColumnFamilyHandle* cf,
5348
ThreadState::SnapshotState& snap_state);

db_stress_tool/multi_ops_txns_stress.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,6 +1382,13 @@ Status MultiOpsTxnsStressTest::CommitAndCreateTimestampedSnapshotIfNeeded(
13821382
} else {
13831383
s = txn.Commit();
13841384
}
1385+
assert(txn_db_);
1386+
if (FLAGS_create_timestamped_snapshot_one_in > 0 &&
1387+
thread->rand.OneInOpt(50000)) {
1388+
uint64_t now = db_stress_env->NowNanos();
1389+
constexpr uint64_t time_diff = static_cast<uint64_t>(1000) * 1000 * 1000;
1390+
txn_db_->ReleaseTimestampedSnapshotsOlderThan(now - time_diff);
1391+
}
13851392
return s;
13861393
}
13871394

0 commit comments

Comments
 (0)