Skip to content

Commit f6c1489

Browse files
cbi42facebook-github-bot
authored andcommitted
Add a new TransactionDBOptions txn_commit_bypass_memtable_threshold (#13304)
Summary: ... to makes it easier to use the new transaction feature `commit_bypass_memtable`. Instead of needing to specify the option when creating a transaction, this option allows users to specify a threshold on the number of updates in a transaction to determine when to skip memtables writes for a transaction. Pull Request resolved: #13304 Test Plan: a new unit test for the new option Reviewed By: pdillinger Differential Revision: D68288579 Pulled By: cbi42 fbshipit-source-id: d3076629891d8b1d427878d20f0ac40dc0dadd35
1 parent 5405835 commit f6c1489

File tree

5 files changed

+90
-13
lines changed

5 files changed

+90
-13
lines changed

include/rocksdb/utilities/transaction.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,8 +750,6 @@ class Transaction {
750750

751751
virtual TxnTimestamp GetCommitTimestamp() const { return kMaxTxnTimestamp; }
752752

753-
virtual bool GetCommitBypassMemTable() const { return false; }
754-
755753
protected:
756754
explicit Transaction(const TransactionDB* /*db*/) {}
757755
Transaction() : log_number_(0), txn_state_(STARTED) {}

include/rocksdb/utilities/transaction_db.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,16 @@ struct TransactionDBOptions {
247247
// for more details.
248248
std::vector<std::shared_ptr<SecondaryIndex>> secondary_indices;
249249

250+
// EXPERIMENTAL, SUBJECT TO CHANGE
251+
// This option is only valid for write committed. If the number of updates in
252+
// a transaction exceeds this threshold, then the transaction commit will skip
253+
// insertions into memtable as an optimization to reduce commit latency.
254+
// See comment for TransactionOptions::commit_bypass_memtable for more detail.
255+
// Setting TransactionOptions::commit_bypass_memtable to true takes precedence
256+
// over this option.
257+
uint32_t txn_commit_bypass_memtable_threshold =
258+
std::numeric_limits<uint32_t>::max();
259+
250260
private:
251261
// 128 entries
252262
// Should the default value change, please also update wp_snapshot_cache_bits
@@ -347,7 +357,7 @@ struct TransactionOptions {
347357
// DeleteRange, SingleDelete.
348358
bool write_batch_track_timestamp_size = false;
349359

350-
// EXPERIMENTAL
360+
// EXPERIMENTAL, SUBJECT TO CHANGE
351361
// Only supports write-committed policy. If set to true, the transaction will
352362
// skip memtable write and ingest into the DB directly during Commit(). This
353363
// makes Commit() much faster for transactions with many operations.

utilities/transactions/pessimistic_transaction.cc

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,14 @@ void PessimisticTransaction::Initialize(const TransactionOptions& txn_options) {
104104
read_timestamp_ = kMaxTxnTimestamp;
105105
commit_timestamp_ = kMaxTxnTimestamp;
106106

107-
commit_bypass_memtable_ = txn_options.commit_bypass_memtable;
108-
write_batch_.SetTrackPerCFStat(txn_options.commit_bypass_memtable);
107+
if (txn_options.commit_bypass_memtable) {
108+
commit_bypass_memtable_threshold_ = 0;
109+
} else {
110+
commit_bypass_memtable_threshold_ =
111+
db_options.txn_commit_bypass_memtable_threshold;
112+
}
113+
write_batch_.SetTrackPerCFStat(commit_bypass_memtable_threshold_ <
114+
std::numeric_limits<uint32_t>::max());
109115
}
110116

111117
PessimisticTransaction::~PessimisticTransaction() {
@@ -846,7 +852,8 @@ Status WriteCommittedTxn::CommitInternal() {
846852
if (!needs_ts) {
847853
s = WriteBatchInternal::MarkCommit(working_batch, name_);
848854
} else {
849-
assert(!commit_bypass_memtable_);
855+
assert(commit_bypass_memtable_threshold_ ==
856+
std::numeric_limits<uint32_t>::max());
850857
assert(commit_timestamp_ != kMaxTxnTimestamp);
851858
char commit_ts_buf[sizeof(kMaxTxnTimestamp)];
852859
EncodeFixed64(commit_ts_buf, commit_timestamp_);
@@ -882,7 +889,7 @@ Status WriteCommittedTxn::CommitInternal() {
882889
// any operations appended to this working_batch will be ignored from WAL
883890
working_batch->MarkWalTerminationPoint();
884891

885-
const bool bypass_memtable = commit_bypass_memtable_ && wb->Count() > 0;
892+
bool bypass_memtable = wb->Count() > commit_bypass_memtable_threshold_;
886893
if (!bypass_memtable) {
887894
// insert prepared batch into Memtable only skipping WAL.
888895
// Memtable will ignore BeginPrepare/EndPrepare markers
@@ -904,6 +911,8 @@ Status WriteCommittedTxn::CommitInternal() {
904911
}
905912
}
906913
assert(log_number_ > 0);
914+
TEST_SYNC_POINT_CALLBACK("WriteCommittedTxn::CommitInternal:bypass_memtable",
915+
static_cast<void*>(&bypass_memtable));
907916
if (bypass_memtable) {
908917
s = db_impl_->WriteImpl(
909918
write_options_, working_batch, /*callback*/ nullptr,

utilities/transactions/pessimistic_transaction.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ class PessimisticTransaction : public TransactionBaseImpl {
168168
bool skip_prepare_ = false;
169169
// Refer to
170170
// TransactionOptions::commit_bypass_memtable
171-
bool commit_bypass_memtable_ = false;
171+
uint32_t commit_bypass_memtable_threshold_ =
172+
std::numeric_limits<uint32_t>::max();
172173

173174
private:
174175
friend class TransactionTest_ValidateSnapshotTest_Test;
@@ -307,10 +308,6 @@ class WriteCommittedTxn : public PessimisticTransaction {
307308
Status SetCommitTimestamp(TxnTimestamp ts) override;
308309
TxnTimestamp GetCommitTimestamp() const override { return commit_timestamp_; }
309310

310-
bool GetCommitBypassMemTable() const override {
311-
return commit_bypass_memtable_;
312-
}
313-
314311
private:
315312
template <typename TValue>
316313
Status GetForUpdateImpl(const ReadOptions& read_options,

utilities/transactions/transaction_test.cc

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8830,7 +8830,8 @@ class CommitBypassMemtableTest : public DBTestBase,
88308830
Options options;
88318831
TransactionDBOptions txn_db_opts;
88328832

8833-
void SetUpTransactionDB() {
8833+
void SetUpTransactionDB(
8834+
uint32_t threshold = std::numeric_limits<uint32_t>::max()) {
88348835
options = CurrentOptions();
88358836
options.create_if_missing = true;
88368837
options.allow_2pc = true;
@@ -8842,6 +8843,7 @@ class CommitBypassMemtableTest : public DBTestBase,
88428843
Destroy(options, true);
88438844

88448845
txn_db_opts.write_policy = TxnDBWritePolicy::WRITE_COMMITTED;
8846+
txn_db_opts.txn_commit_bypass_memtable_threshold = threshold;
88458847
ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db));
88468848
ASSERT_NE(txn_db, nullptr);
88478849
db_ = txn_db;
@@ -9297,6 +9299,67 @@ TEST_P(CommitBypassMemtableTest, Recovery) {
92979299
VerifyDBFromMap(expected);
92989300
}
92999301

9302+
TEST_P(CommitBypassMemtableTest, ThresholdTxnDBOption) {
9303+
// Tests TransactionDBOptions::txn_commit_bypass_memtable_threshold
9304+
const uint32_t threshold = 10;
9305+
SetUpTransactionDB(/*threshold=*/threshold);
9306+
bool commit_bypass_memtable = false;
9307+
// TODO: add and use stats for this
9308+
SyncPoint::GetInstance()->SetCallBack(
9309+
"WriteCommittedTxn::CommitInternal:bypass_memtable",
9310+
[&](void* arg) { commit_bypass_memtable = *(static_cast<bool*>(arg)); });
9311+
SyncPoint::GetInstance()->EnableProcessing();
9312+
9313+
// TransactionOptions::commit_bypass_memtable takes precedence
9314+
WriteOptions wopts;
9315+
TransactionOptions txn_opts;
9316+
txn_opts.commit_bypass_memtable = true;
9317+
Transaction* txn1 = txn_db->BeginTransaction(wopts, txn_opts, nullptr);
9318+
ASSERT_OK(txn1->SetName("xid1"));
9319+
ASSERT_OK(txn1->Put("k2", "v2"));
9320+
ASSERT_OK(txn1->Put("k1", "v1"));
9321+
ASSERT_OK(txn1->Prepare());
9322+
ASSERT_OK(txn1->Commit());
9323+
ASSERT_TRUE(commit_bypass_memtable);
9324+
9325+
// Below threshold
9326+
for (auto num_ops : {threshold, threshold + 1}) {
9327+
commit_bypass_memtable = false;
9328+
txn_opts.commit_bypass_memtable = false;
9329+
auto txn = txn_db->BeginTransaction(wopts, txn_opts, txn1);
9330+
txn1 = nullptr;
9331+
ASSERT_OK(txn->SetName("xid" + std::to_string(num_ops)));
9332+
for (uint32_t i = 0; i < num_ops; ++i) {
9333+
ASSERT_OK(
9334+
txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
9335+
}
9336+
ASSERT_OK(txn->Prepare());
9337+
ASSERT_OK(txn->Commit());
9338+
ASSERT_EQ(commit_bypass_memtable, num_ops > threshold);
9339+
delete txn;
9340+
}
9341+
9342+
// Repeat the same test with updates to two CFs
9343+
std::vector<std::string> cfs = {"pk", "sk"};
9344+
CreateColumnFamilies(cfs, options);
9345+
9346+
// Below threshold
9347+
for (auto num_ops : {threshold, threshold + 1}) {
9348+
commit_bypass_memtable = false;
9349+
txn_opts.commit_bypass_memtable = false;
9350+
auto txn_cf = txn_db->BeginTransaction(wopts, txn_opts, nullptr);
9351+
ASSERT_OK(txn_cf->SetName("xid_cf" + std::to_string(num_ops)));
9352+
for (uint32_t i = 0; i < num_ops; ++i) {
9353+
ASSERT_OK(txn_cf->Put(handles_[i % 2], "key" + std::to_string(i),
9354+
"value" + std::to_string(i)));
9355+
}
9356+
ASSERT_OK(txn_cf->Prepare());
9357+
ASSERT_OK(txn_cf->Commit());
9358+
ASSERT_EQ(commit_bypass_memtable, num_ops > threshold);
9359+
delete txn_cf;
9360+
}
9361+
}
9362+
93009363
} // namespace ROCKSDB_NAMESPACE
93019364

93029365
int main(int argc, char** argv) {

0 commit comments

Comments
 (0)