Skip to content

Commit 87ea83e

Browse files
authored
Fix race between list_versions and delete_snapshot on NFS 8104588520 (#2092)
See arcticc PR #1170 for more context and discussion. The keys in the `KeysNotFoundException` were not having `unencode_object_id` applied to them, so in the snapshot iteration we were incorrectly thinking they were different to the snapshot key being iterated over (which has had the unencoding applied).
1 parent 6371256 commit 87ea83e

File tree

6 files changed

+109
-14
lines changed

6 files changed

+109
-14
lines changed

cpp/arcticdb/storage/s3/detail-inl.hpp

+8-6
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ namespace s3 {
168168
template<class KeyBucketizer>
169169
void do_read_impl(Composite<VariantKey> &&ks,
170170
const ReadVisitor &visitor,
171+
folly::Function<VariantKey(VariantKey&&)> key_decoder,
171172
const std::string &root_folder,
172173
const std::string &bucket_name,
173174
const S3ClientWrapper &s3_client,
@@ -179,7 +180,7 @@ namespace s3 {
179180

180181
(fg::from(ks.as_range()) | fg::move | fg::groupBy(fmt_db)).foreach(
181182
[&s3_client, &bucket_name, &root_folder, b = std::move(bucketizer), &visitor, &keys_not_found,
182-
opts = opts](auto &&group) {
183+
&key_decoder, opts = opts](auto &&group) {
183184

184185
for (auto &k: group.values()) {
185186
auto key_type_dir = key_type_folder(root_folder, variant_key_type(k));
@@ -189,25 +190,26 @@ namespace s3 {
189190
s3_object_name,
190191
bucket_name);
191192

193+
auto unencoded_key = key_decoder(std::move(k));
192194
if (get_object_result.is_success()) {
193195
ARCTICDB_SUBSAMPLE(S3StorageVisitSegment, 0)
194196

195-
visitor(k, std::move(get_object_result.get_output()));
197+
visitor(unencoded_key, std::move(get_object_result.get_output()));
196198

197-
ARCTICDB_DEBUG(log::storage(), "Read key {}: {}", variant_key_type(k),
198-
variant_key_view(k));
199+
ARCTICDB_DEBUG(log::storage(), "Read key {}: {}", variant_key_type(unencoded_key),
200+
variant_key_view(unencoded_key));
199201
} else {
200202
auto &error = get_object_result.get_error();
201203
raise_if_unexpected_error(error, s3_object_name);
202204

203205
log::storage().log(
204206
opts.dont_warn_about_missing_key ? spdlog::level::debug : spdlog::level::warn,
205207
"Failed to find segment for key '{}' {}: {}",
206-
variant_key_view(k),
208+
variant_key_view(unencoded_key),
207209
error.GetExceptionName().c_str(),
208210
error.GetMessage().c_str());
209211

210-
keys_not_found.push_back(k);
212+
keys_not_found.push_back(unencoded_key);
211213
}
212214
}
213215
});

cpp/arcticdb/storage/s3/nfs_backed_storage.cpp

+1-5
Original file line numberDiff line numberDiff line change
@@ -171,15 +171,11 @@ void NfsBackedStorage::do_update(Composite<KeySegmentPair>&& kvs, UpdateOpts) {
171171
}
172172

173173
void NfsBackedStorage::do_read(Composite<VariantKey>&& ks, const ReadVisitor& visitor, ReadKeyOpts opts) {
174-
auto func = [visitor] (const VariantKey& k, Segment&& seg) mutable {
175-
visitor(unencode_object_id(k), std::move(seg));
176-
};
177-
178174
auto enc = ks.transform([] (auto&& key) {
179175
return encode_object_id(key);
180176
});
181177

182-
s3::detail::do_read_impl(std::move(enc), func, root_folder_, bucket_name_, *s3_client_, NfsBucketizer{}, opts);
178+
s3::detail::do_read_impl(std::move(enc), visitor, unencode_object_id, root_folder_, bucket_name_, *s3_client_, NfsBucketizer{}, opts);
183179
}
184180

185181
void NfsBackedStorage::do_remove(Composite<VariantKey>&& ks, RemoveOpts) {

cpp/arcticdb/storage/s3/s3_storage.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ void S3Storage::do_update(Composite<KeySegmentPair>&& kvs, UpdateOpts) {
6262
}
6363

6464
void S3Storage::do_read(Composite<VariantKey>&& ks, const ReadVisitor& visitor, ReadKeyOpts opts) {
65-
detail::do_read_impl(std::move(ks), visitor, root_folder_, bucket_name_, *s3_client_, FlatBucketizer{}, opts);
65+
detail::do_read_impl(std::move(ks), visitor, folly::identity, root_folder_, bucket_name_, *s3_client_, FlatBucketizer{}, opts);
6666
}
6767

6868
void S3Storage::do_remove(Composite<VariantKey>&& ks, RemoveOpts) {

cpp/arcticdb/storage/test/test_s3_storage.cpp

+50-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <arcticdb/storage/s3/s3_api.hpp>
1212
#include <arcticdb/storage/s3/s3_storage.hpp>
1313
#include <arcticdb/storage/s3/s3_mock_client.hpp>
14+
#include <arcticdb/storage/s3/nfs_backed_storage.hpp>
1415
#include <arcticdb/storage/s3/detail-inl.hpp>
1516
#include <arcticdb/entity/protobufs.hpp>
1617
#include <arcticdb/entity/variant_key.hpp>
@@ -225,7 +226,38 @@ class S3StorageFixture : public testing::Test {
225226
S3Storage store;
226227
};
227228

228-
TEST_F(S3StorageFixture, test_key_exists){
229+
arcticdb::storage::nfs_backed::NfsBackedStorage::Config get_test_nfs_config() {
230+
arcticdb::storage::nfs_backed::NfsBackedStorage::Config cfg;
231+
cfg.set_use_mock_storage_for_testing(true);
232+
return cfg;
233+
}
234+
235+
class NfsStorageFixture : public testing::Test {
236+
protected:
237+
NfsStorageFixture():
238+
store(LibraryPath("lib", '.'), OpenMode::DELETE, get_test_nfs_config())
239+
{}
240+
241+
arcticdb::storage::nfs_backed::NfsBackedStorage store;
242+
};
243+
244+
class S3AndNfsStorageFixture : public testing::TestWithParam<std::string> {
245+
public:
246+
std::unique_ptr<Storage> get_storage() {
247+
LibraryPath lp{"lib"};
248+
if (GetParam() == "nfs") {
249+
return std::make_unique<arcticdb::storage::nfs_backed::NfsBackedStorage>(
250+
lp, OpenMode::DELETE, get_test_nfs_config());
251+
} else if (GetParam() == "s3") {
252+
return std::make_unique<S3Storage>(
253+
lp, OpenMode::DELETE, S3Settings(get_test_s3_config()));
254+
} else {
255+
util::raise_rte("Unexpected fixture type {}", GetParam());
256+
}
257+
}
258+
};
259+
260+
TEST_F(S3StorageFixture, test_key_exists) {
229261
write_in_store(store, "symbol");
230262

231263
ASSERT_TRUE(exists_in_store(store, "symbol"));
@@ -245,6 +277,23 @@ TEST_F(S3StorageFixture, test_read){
245277
UnexpectedS3ErrorException);
246278
}
247279

280+
TEST_P(S3AndNfsStorageFixture, test_read_missing_key_in_exception){
281+
auto s = get_storage();
282+
auto& store = *s;
283+
284+
try {
285+
read_in_store(store, "snap-not-present", KeyType::SNAPSHOT_REF);
286+
FAIL();
287+
} catch (KeyNotFoundException& e) {
288+
auto keys = e.keys().as_range();
289+
ASSERT_EQ(keys.size(), 1);
290+
const auto& key = keys.at(0);
291+
ASSERT_EQ(variant_key_id(key), StreamId{"snap-not-present"});
292+
}
293+
}
294+
295+
INSTANTIATE_TEST_SUITE_P(S3AndNfs, S3AndNfsStorageFixture, testing::Values("s3", "nfs"));
296+
248297
TEST_F(S3StorageFixture, test_write){
249298
write_in_store(store, "symbol");
250299
ASSERT_THROW(

cpp/arcticdb/version/snapshot.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ void iterate_snapshots(const std::shared_ptr<Store>& store, folly::Function<void
107107
visitor(vk);
108108
} catch (storage::KeyNotFoundException& e) {
109109
e.keys().broadcast([&vk, &e](const VariantKey& key) {
110-
if (key != vk) throw storage::KeyNotFoundException(std::move(e.keys()));
110+
if (key != vk) {
111+
throw storage::KeyNotFoundException(std::move(e.keys()));
112+
}
111113
});
112114
ARCTICDB_DEBUG(log::version(), "Ignored exception due to {} being deleted during iterate_snapshots().");
113115
}

python/tests/integration/arcticdb/test_s3.py

+46
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
77
"""
88
import re
9+
import time
10+
from multiprocessing import Queue, Process
911

1012
import pytest
1113
import pandas as pd
@@ -78,6 +80,50 @@ def test_nfs_backed_s3_storage(lib_name, nfs_backed_s3_storage):
7880
assert re.match(bucketized_pattern, o.key), f"Object {o.key} does not match pattern {bucketized_pattern}"
7981

8082

83+
def read_repeatedly(version_store, queue: Queue):
84+
while True:
85+
try:
86+
version_store.list_versions("tst")
87+
except Exception as e:
88+
queue.put(e)
89+
raise
90+
time.sleep(0.1)
91+
92+
93+
def write_repeatedly(version_store):
94+
while True:
95+
for i in range(10):
96+
version_store.snapshot(f"snap-{i}")
97+
for i in range(10):
98+
version_store.delete_snapshot(f"snap-{i}")
99+
time.sleep(0.1)
100+
101+
102+
def test_racing_list_and_delete_nfs(nfs_backed_s3_storage, lib_name):
103+
"""This test is for a regression with NFS where iterating snapshots raced with
104+
deleting them, due to a bug in our logic to suppress the KeyNotFoundException."""
105+
lib = nfs_backed_s3_storage.create_version_store_factory(lib_name)()
106+
lib.write("tst", [1, 2, 3])
107+
108+
exceptions_in_reader = Queue()
109+
reader = Process(target=read_repeatedly, args=(lib, exceptions_in_reader))
110+
writer = Process(target=write_repeatedly, args=(lib,))
111+
112+
try:
113+
reader.start()
114+
writer.start()
115+
116+
# Run test for 2 seconds - this was enough for this regression test to reliably fail
117+
# 10 times in a row.
118+
reader.join(2)
119+
writer.join(0.001)
120+
finally:
121+
writer.terminate()
122+
reader.terminate()
123+
124+
assert exceptions_in_reader.empty()
125+
126+
81127
@pytest.fixture(scope="function", params=[MotoNfsBackedS3StorageFixtureFactory, MotoS3StorageFixtureFactory])
82128
def s3_storage_dots_in_path(request):
83129
prefix = "some_path/.thing_with_a_dot/even.more.dots/end"

0 commit comments

Comments
 (0)