Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for data block hash index for column families with user-defined timestamps #13283

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9be1360
Fix internal key access for data block hash index build and lookup in…
brennan913 Dec 3, 2024
98aad0d
Add new API to enable data block hash index in the presence of user-d…
brennan913 Dec 4, 2024
0cc501b
Remove TODO for data block hash index timestamp support
brennan913 Dec 4, 2024
b13a523
Merge branch 'facebook:main' into main
brennan913 Dec 4, 2024
bfeb664
Merge branch 'facebook:main' into main
brennan913 Dec 5, 2024
60de242
Merge branch 'facebook:main' into main
brennan913 Dec 20, 2024
bec564c
Merge branch 'facebook:main' into main
brennan913 Dec 21, 2024
bbfefc7
Merge branch 'facebook:main' into main
brennan913 Dec 26, 2024
4161403
Merge branch 'facebook:main' into main
brennan913 Jan 1, 2025
6e3e0b0
Merge branch 'facebook:main' into main
brennan913 Jan 4, 2025
dd1d8fd
Add draft of timestamp hash index test
brennan913 Jan 6, 2025
d1f2598
Merge branch 'facebook:main' into main
brennan913 Jan 7, 2025
73384ec
Merge branch 'main' of github.com:brennan913/rocksdb
brennan913 Jan 7, 2025
c836637
Add comment documenting KeysAreBytewiseComparableOtherThanTimestamp()
brennan913 Jan 7, 2025
b215655
Move timestamp hash test from hash index test to timestamp basic test
brennan913 Jan 7, 2025
2f98db8
Rework unit test
brennan913 Jan 8, 2025
6aa5c19
Clean up testing and add comments
brennan913 Jan 8, 2025
b631824
Merge branch 'facebook:main' into main
brennan913 Jan 9, 2025
dd14829
Add proposed public API change to unreleased history
brennan913 Jan 9, 2025
9470d78
Merge branch 'main' of github.com:brennan913/rocksdb
brennan913 Jan 9, 2025
c1c7c42
Formatting
brennan913 Jan 9, 2025
46c3bde
Typo
brennan913 Jan 9, 2025
d55e089
Update comment
brennan913 Jan 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions db/db_with_timestamp_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4795,6 +4795,74 @@ TEST_F(DBBasicTestWithTimestamp, TimestampFilterTableReadOnGet) {
Close();
}

class DBBasicTestWithTimestampDataBlockHashIndex
: public DBBasicTestWithTimestampBase {
public:
DBBasicTestWithTimestampDataBlockHashIndex()
: DBBasicTestWithTimestampBase("data_block_hash_index_timestamp_test") {}
protected:
class TestComparator : public DBBasicTestWithTimestampBase::TestComparator {
public:
explicit TestComparator(size_t ts_sz)
: DBBasicTestWithTimestampBase::TestComparator(ts_sz) {}

// Override this method to allow data block hash index to be used
bool KeysAreBytewiseComparableOtherThanTimestamp() const override {
return true;
}
};
};

TEST_F(DBBasicTestWithTimestampDataBlockHashIndex, HashIndexWithTimestamp) {

Options options = CurrentOptions();

// Use a bytewise comparable, timestamp-aware comparator that overrides
// KeysAreBytewiseComparableOtherThanTimestamp() to return true,
// enabling use of data block hash index
const size_t kTimestampSize = Timestamp(0, 0).size();
DBBasicTestWithTimestampDataBlockHashIndex::TestComparator
comparator(kTimestampSize);
options.comparator = &comparator;

// Select data block hash index as this CF's data block index type
BlockBasedTableOptions table_options;
table_options.data_block_index_type =
BlockBasedTableOptions::kDataBlockBinaryAndHash;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));


// Run sequence of $count Put()s and one Get() multiple times
// Original incompatibility between UDTs and Data Block Hash Index
// led to inconsistent behavior that would _usually_ cause this test to fail,
// so multiple iterations significantly reduces misleading passes.
// This test always passes when UDT and Data Block Hash Index are compatible.
for (uint32_t iteration = 0; iteration < 5; iteration++) {
DestroyAndReopen(options);

WriteOptions wopts;

const uint32_t count = 124;
for (uint32_t i = 0; i < count; i++) {
ASSERT_OK(db_->Put(wopts, "key", Timestamp(i, 0), std::to_string(i)));
}

ASSERT_OK(Flush());

ReadOptions ropts;
Slice ts = Timestamp(count, 0);
ropts.timestamp = &ts;

std::string value;
std::string expected_value = std::to_string(count - 1);

ASSERT_OK(db_->Get(ropts, "key", &value));

// When UDTs and hash index are incompatible, these values can mismatch
ASSERT_EQ(expected_value, value);
}
}

} // namespace ROCKSDB_NAMESPACE

int main(int argc, char** argv) {
Expand Down
10 changes: 10 additions & 0 deletions include/rocksdb/comparator.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ class Comparator : public Customizable, public CompareInterface {
// with the customized comparator.
virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; }

// return true if the user portion of the key is bytwise comparable other than
// any user defined timestamp.
// i.e. if the output of ExtractUserKeyAndStripTimestamp() on two keys results
// in distinct byte sequences, then this comparator considers them unequal.
// This is used to determine if DataBlockHashIndex is compatible
// with customized comparators that support user-defined timestamps.
virtual bool KeysAreBytewiseComparableOtherThanTimestamp() const {
return false;
}

// if it is a wrapped comparator, may return the root one.
// return itself it is not wrapped.
virtual const Comparator* GetRootComparator() const { return this; }
Expand Down
2 changes: 1 addition & 1 deletion table/block_based/block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ void MetaBlockIter::SeekImpl(const Slice& target) {
// with a smaller [ type | seqno ] (i.e. a larger seqno, or the same seqno
// but larger type).
bool DataBlockIter::SeekForGetImpl(const Slice& target) {
Slice target_user_key = ExtractUserKey(target);
Slice target_user_key = ExtractUserKeyAndStripTimestamp(target, ts_sz_);
uint32_t map_offset = restarts_ + num_restarts_ * sizeof(uint32_t);
uint8_t entry =
data_block_hash_index_->Lookup(data_, map_offset, target_user_key);
Expand Down
6 changes: 4 additions & 2 deletions table/block_based/block_based_table_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,10 @@ struct BlockBasedTableBuilder::Rep {
data_block(table_options.block_restart_interval,
table_options.use_delta_encoding,
false /* use_value_delta_encoding */,
tbo.internal_comparator.user_comparator()
->CanKeysWithDifferentByteContentsBeEqual()
(tbo.internal_comparator.user_comparator()
->CanKeysWithDifferentByteContentsBeEqual() &&
!tbo.internal_comparator.user_comparator()
->KeysAreBytewiseComparableOtherThanTimestamp())
? BlockBasedTableOptions::kDataBlockBinarySearch
: table_options.data_block_index_type,
table_options.data_block_hash_table_util_ratio, ts_sz,
Expand Down
4 changes: 2 additions & 2 deletions table/block_based/block_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ BlockBuilder::BlockBuilder(
use_delta_encoding_(use_delta_encoding),
use_value_delta_encoding_(use_value_delta_encoding),
strip_ts_sz_(persist_user_defined_timestamps ? 0 : ts_sz),
ts_sz_(ts_sz),
is_user_key_(is_user_key),
restarts_(1, 0), // First restart point is at offset 0
counter_(0),
Expand Down Expand Up @@ -236,13 +237,12 @@ inline void BlockBuilder::AddWithLastKeyImpl(const Slice& key,
buffer_.append(value.data(), value.size());
}

// TODO(yuzhangyu): make user defined timestamp work with block hash index.
if (data_block_hash_index_builder_.Valid()) {
// Only data blocks should be using `kDataBlockBinaryAndHash` index type.
// And data blocks should always be built with internal keys instead of
// user keys.
assert(!is_user_key_);
data_block_hash_index_builder_.Add(ExtractUserKey(key),
data_block_hash_index_builder_.Add(ExtractUserKeyAndStripTimestamp(key, ts_sz_),
restarts_.size() - 1);
}

Expand Down
1 change: 1 addition & 0 deletions table/block_based/block_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class BlockBuilder {
// This is non-zero if there is user-defined timestamp in the user key and it
// should not be persisted.
const size_t strip_ts_sz_;
const size_t ts_sz_;
// Whether the keys provided to build this block are user keys. If not,
// the keys are internal keys. This will affect how timestamp stripping is
// done for the key if `persisted_user_defined_timestamps_` is false and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Add `Comparator::KeysAreBytewiseComparableOtherThanTimestamp` to allow column
families that use a timestamp-aware comparator with otherwise
bytewise-comparable keys to enable
`BlockBasedTableOptions::kDataBlockBinaryAndHash`