Skip to content

Commit 324453e

Browse files
cz2hfacebook-github-bot
authored andcommitted
Fix rowcache get returning incorrect timestamp (facebook#11952)
Summary: Fixes facebook#7930. When there is a timestamp associated with stored records, get from row cache will return the timestamp provided in query instead of the timestamp associated with the stored record. ## Cause of error: Currently a row_handle is fetched using row_cache_key(contains a timestamp provided by user query) and the row_handle itself does not persist timestamp associated with the object. Hence the [GetContext::SaveValue() ](https://github.com/facebook/rocksdb/blob/6e3429b8a6a53d5e477074057b5f27218063b5f2/table/get_context.cc#L257) function will fetch the timestamp in row_cache_key and may return the incorrect timestamp value. ## Proposed Solution If current cf enables ts, append a timestamp associated with stored records after the value in replay_log (equivalently the value of row cache entry). When read, `replayGetContextLog()` will update parsed_key with the correct timestamp. Pull Request resolved: facebook#11952 Reviewed By: ajkr Differential Revision: D51501176 Pulled By: jowlyzhang fbshipit-source-id: 808fc943a8ae95de56ae0e82ec59a2573a031f28
1 parent ddb7df1 commit 324453e

File tree

3 files changed

+224
-80
lines changed

3 files changed

+224
-80
lines changed

db/db_with_timestamp_basic_test.cc

Lines changed: 151 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,84 +1646,198 @@ TEST_F(DBBasicTestWithTimestamp, GetWithRowCache) {
16461646

16471647
const Snapshot* snap_with_nothing = db_->GetSnapshot();
16481648
ASSERT_OK(db_->Put(write_opts, "foo", ts_early, "bar"));
1649-
const Snapshot* snap_with_foo = db_->GetSnapshot();
1649+
ASSERT_OK(db_->Put(write_opts, "foo2", ts_early, "bar2"));
1650+
ASSERT_OK(db_->Put(write_opts, "foo3", ts_early, "bar3"));
16501651

1651-
// Ensure file has sequence number greater than snapshot_with_foo
1652-
for (int i = 0; i < 10; i++) {
1653-
std::string numStr = std::to_string(i);
1654-
ASSERT_OK(db_->Put(write_opts, numStr, ts_later, numStr));
1655-
}
1652+
const Snapshot* snap_with_foo = db_->GetSnapshot();
16561653
ASSERT_OK(Flush());
1657-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
1658-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
16591654

16601655
ReadOptions read_opts;
16611656
read_opts.timestamp = &ts_later_slice;
16621657

16631658
std::string read_value;
16641659
std::string read_ts;
1665-
Status s = db_->Get(read_opts, "foo", &read_value, &read_ts);
1666-
ASSERT_OK(s);
1667-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
1668-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
1669-
ASSERT_EQ(read_ts, ts_early);
1660+
Status s;
16701661

1671-
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
1672-
ASSERT_OK(s);
1673-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
1674-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
1675-
// Row cache is not storing the ts when record is inserted/updated.
1676-
// To be fixed after enabling ROW_CACHE with timestamp.
1677-
// ASSERT_EQ(read_ts, ts_early);
1662+
int expected_hit_count = 0;
1663+
int expected_miss_count = 0;
1664+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1665+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count);
1666+
1667+
{
1668+
read_opts.timestamp = nullptr;
1669+
s = db_->Get(read_opts, "foo", &read_value);
1670+
ASSERT_NOK(s);
1671+
ASSERT_TRUE(s.IsInvalidArgument());
1672+
}
1673+
1674+
// Mix use of Get
1675+
{
1676+
read_opts.timestamp = &ts_later_slice;
1677+
1678+
// Use Get without ts first, expect cache entry to store the correct ts
1679+
s = db_->Get(read_opts, "foo2", &read_value);
1680+
ASSERT_OK(s);
1681+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1682+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1683+
++expected_miss_count);
1684+
ASSERT_EQ(read_value, "bar2");
1685+
1686+
s = db_->Get(read_opts, "foo2", &read_value, &read_ts);
1687+
ASSERT_OK(s);
1688+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), ++expected_hit_count);
1689+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count);
1690+
ASSERT_EQ(read_ts, ts_early);
1691+
ASSERT_EQ(read_value, "bar2");
1692+
1693+
// Use Get with ts first, expect the Get without ts can get correct record
1694+
s = db_->Get(read_opts, "foo3", &read_value, &read_ts);
1695+
ASSERT_OK(s);
1696+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1697+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1698+
++expected_miss_count);
1699+
ASSERT_EQ(read_ts, ts_early);
1700+
ASSERT_EQ(read_value, "bar3");
1701+
1702+
s = db_->Get(read_opts, "foo3", &read_value);
1703+
ASSERT_OK(s);
1704+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), ++expected_hit_count);
1705+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count);
1706+
ASSERT_EQ(read_value, "bar3");
1707+
}
1708+
1709+
{
1710+
// Test with consecutive calls of Get with ts.
1711+
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
1712+
ASSERT_OK(s);
1713+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1714+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1715+
++expected_miss_count);
1716+
ASSERT_EQ(read_ts, ts_early);
1717+
ASSERT_EQ(read_value, "bar");
1718+
1719+
// Test repeated get on cache entry
1720+
for (int i = 0; i < 3; i++) {
1721+
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
1722+
ASSERT_OK(s);
1723+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT),
1724+
++expected_hit_count);
1725+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1726+
expected_miss_count);
1727+
ASSERT_EQ(read_ts, ts_early);
1728+
ASSERT_EQ(read_value, "bar");
1729+
}
1730+
}
16781731

16791732
{
16801733
std::string ts_nothing = Timestamp(0, 0);
16811734
Slice ts_nothing_slice = ts_nothing;
16821735
read_opts.timestamp = &ts_nothing_slice;
16831736
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
16841737
ASSERT_TRUE(s.IsNotFound());
1685-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
1686-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
1687-
1688-
read_opts.timestamp = &ts_later_slice;
1689-
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
1690-
ASSERT_OK(s);
1691-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2);
1692-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
1738+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1739+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1740+
++expected_miss_count);
16931741
}
16941742

16951743
{
16961744
read_opts.snapshot = snap_with_foo;
1697-
1745+
read_opts.timestamp = &ts_later_slice;
16981746
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
16991747
ASSERT_OK(s);
1700-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2);
1701-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 3);
1748+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1749+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1750+
++expected_miss_count);
1751+
ASSERT_EQ(read_ts, ts_early);
1752+
ASSERT_EQ(read_value, "bar");
17021753

17031754
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
17041755
ASSERT_OK(s);
1705-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3);
1706-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 3);
1756+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), ++expected_hit_count);
1757+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count);
1758+
ASSERT_EQ(read_ts, ts_early);
1759+
ASSERT_EQ(read_value, "bar");
17071760
}
17081761

17091762
{
17101763
read_opts.snapshot = snap_with_nothing;
17111764
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
17121765
ASSERT_TRUE(s.IsNotFound());
1713-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3);
1714-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 4);
1766+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1767+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1768+
++expected_miss_count);
17151769

17161770
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
17171771
ASSERT_TRUE(s.IsNotFound());
1718-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3);
1719-
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 5);
1772+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count);
1773+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS),
1774+
++expected_miss_count);
17201775
}
17211776

17221777
db_->ReleaseSnapshot(snap_with_nothing);
17231778
db_->ReleaseSnapshot(snap_with_foo);
17241779
Close();
17251780
}
17261781

1782+
TEST_F(DBBasicTestWithTimestamp, GetWithRowCacheMultiSST) {
1783+
BlockBasedTableOptions table_options;
1784+
table_options.block_size = 1;
1785+
Options options = CurrentOptions();
1786+
options.env = env_;
1787+
options.create_if_missing = true;
1788+
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1789+
LRUCacheOptions cache_options;
1790+
cache_options.capacity = 8192;
1791+
options.row_cache = cache_options.MakeSharedRowCache();
1792+
1793+
const size_t kTimestampSize = Timestamp(0, 0).size();
1794+
TestComparator test_cmp(kTimestampSize);
1795+
options.comparator = &test_cmp;
1796+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1797+
options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1798+
options.disable_auto_compactions = true;
1799+
1800+
DestroyAndReopen(options);
1801+
1802+
std::string ts_early = Timestamp(1, 0);
1803+
std::string ts_later = Timestamp(10, 0);
1804+
Slice ts_later_slice = ts_later;
1805+
1806+
ASSERT_OK(db_->Put(WriteOptions(), "foo", ts_early, "v1"));
1807+
ASSERT_OK(Flush());
1808+
1809+
ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily();
1810+
ASSERT_OK(
1811+
db_->Merge(WriteOptions(), default_cf, "foo", Timestamp(2, 0), "v2"));
1812+
ASSERT_OK(
1813+
db_->Merge(WriteOptions(), default_cf, "foo", Timestamp(3, 0), "v3"));
1814+
ASSERT_OK(Flush());
1815+
1816+
ReadOptions read_opts;
1817+
read_opts.timestamp = &ts_later_slice;
1818+
1819+
std::string read_value;
1820+
std::string read_ts;
1821+
Status s;
1822+
1823+
{
1824+
// Since there are two SST files, will trigger the table lookup twice.
1825+
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
1826+
ASSERT_OK(s);
1827+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
1828+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
1829+
ASSERT_EQ(read_ts, Timestamp(3, 0));
1830+
ASSERT_EQ(read_value, "v1,v2,v3");
1831+
1832+
s = db_->Get(read_opts, "foo", &read_value, &read_ts);
1833+
ASSERT_OK(s);
1834+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2);
1835+
ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2);
1836+
ASSERT_EQ(read_ts, Timestamp(3, 0));
1837+
ASSERT_EQ(read_value, "v1,v2,v3");
1838+
}
1839+
}
1840+
17271841
TEST_P(DBBasicTestWithTimestampTableOptions, MultiGetPrefixFilter) {
17281842
Options options = CurrentOptions();
17291843
options.env = env_;

0 commit comments

Comments
 (0)