Skip to content

Commit c0e3e53

Browse files
committed
MB-65545: [cbcrypto] Add utility function maybeRewriteFiles
Add a function to allow iteration over files in a directory and potentially rewrite the files with or without encryption Change-Id: I0cbd23273ba86595f2ad7d792cda1d104d2778e9 Reviewed-on: https://review.couchbase.org/c/platform/+/224189 Tested-by: Build Bot <build@couchbase.com> Reviewed-by: Pavlos Georgiou <pavlos.georgiou@couchbase.com>
1 parent 2d6c2de commit c0e3e53

3 files changed

Lines changed: 248 additions & 32 deletions

File tree

cbcrypto/file_utilities.cc

Lines changed: 98 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,43 @@
99
*/
1010

1111
#include <cbcrypto/encrypted_file_header.h>
12+
#include <cbcrypto/file_reader.h>
1213
#include <cbcrypto/file_utilities.h>
14+
#include <cbcrypto/file_writer.h>
15+
#include <fmt/format.h>
1316
#include <nlohmann/json.hpp>
17+
#include <platform/dirutils.h>
18+
1419
#include <fstream>
1520

1621
namespace cb::crypto {
1722

23+
static std::string getEncryptionKey(const std::filesystem::path& path) {
24+
std::array<char, sizeof(EncryptedFileHeader)> buffer;
25+
auto size = file_size(path);
26+
if (size < buffer.size()) {
27+
// No header present
28+
return {};
29+
}
30+
31+
std::ifstream input;
32+
input.exceptions(std::ifstream::failbit | std::ifstream::badbit);
33+
input.open(path, std::ios::binary);
34+
input.read(buffer.data(), buffer.size());
35+
input.close();
36+
37+
auto* header = reinterpret_cast<EncryptedFileHeader*>(buffer.data());
38+
if (!header->is_encrypted()) {
39+
throw std::logic_error(
40+
"File with .cef extension does not have correct magic");
41+
}
42+
43+
if (!header->is_supported()) {
44+
throw std::logic_error("File with .cef extension is not supported");
45+
}
46+
return std::string{header->get_id()};
47+
}
48+
1849
std::unordered_set<std::string> findDeksInUse(
1950
const std::filesystem::path& directory,
2051
const std::function<bool(const std::filesystem::path&)>& filefilter,
@@ -28,39 +59,15 @@ std::unordered_set<std::string> findDeksInUse(
2859
continue;
2960
}
3061

31-
std::array<char, sizeof(cb::crypto::EncryptedFileHeader)> buffer;
3262
try {
33-
auto size = file_size(path);
34-
if (size < buffer.size()) {
35-
// No header present
36-
continue;
63+
auto key = getEncryptionKey(path);
64+
if (!key.empty()) {
65+
deks.insert(key);
3766
}
38-
39-
std::ifstream input;
40-
input.exceptions(std::ifstream::failbit | std::ifstream::badbit);
41-
input.open(path, std::ios::binary);
42-
input.read(buffer.data(), buffer.size());
43-
input.close();
4467
} catch (const std::exception& e) {
4568
error("Failed to get deks from",
4669
{{"path", path.string()}, {"error", e.what()}});
47-
continue;
48-
}
49-
50-
auto* header = reinterpret_cast<cb::crypto::EncryptedFileHeader*>(
51-
buffer.data());
52-
if (!header->is_encrypted()) {
53-
error("Logfile with .cef extension does not have correct magic",
54-
{{"path", path.string()}});
55-
continue;
56-
}
57-
58-
if (!header->is_supported()) {
59-
error("Logfile with .cef extension is not supported",
60-
{{"path", path.string()}});
61-
continue;
6270
}
63-
deks.insert(std::string(header->get_id()));
6471
}
6572

6673
if (ec) {
@@ -71,4 +78,68 @@ std::unordered_set<std::string> findDeksInUse(
7178
return deks;
7279
}
7380

81+
void maybeRewriteFiles(
82+
const std::filesystem::path& directory,
83+
const std::function<bool(const std::filesystem::path&,
84+
std::string_view)>& filefilter,
85+
SharedEncryptionKey encryption_key,
86+
const std::function<SharedEncryptionKey(std::string_view)>&
87+
key_lookup_function,
88+
const std::function<void(std::string_view, const nlohmann::json&)>&
89+
error,
90+
std::string_view unencrypted_extension) {
91+
std::error_code ec;
92+
for (const auto& p : std::filesystem::directory_iterator(directory, ec)) {
93+
auto path = p.path();
94+
std::string key;
95+
if (path.extension() == ".cef") {
96+
try {
97+
key = getEncryptionKey(path);
98+
} catch (const std::exception& e) {
99+
error("Failed to get deks from",
100+
{{"path", path.string()}, {"error", e.what()}});
101+
continue;
102+
}
103+
}
104+
105+
if (!filefilter(path, key)) {
106+
continue;
107+
}
108+
109+
auto reader = FileReader::create(path, key_lookup_function);
110+
std::filesystem::path tmpfile = cb::io::mktemp(path.string());
111+
auto writer = FileWriter::create(encryption_key, tmpfile, 64 * 1024);
112+
113+
bool eof = false;
114+
do {
115+
try {
116+
auto message = reader->nextChunk();
117+
if (message.empty()) {
118+
eof = true;
119+
} else {
120+
writer->write(message);
121+
}
122+
} catch (const std::underflow_error&) {
123+
error("Partial chunk detected", {{"path", path.string()}});
124+
eof = true;
125+
}
126+
} while (!eof);
127+
writer->flush();
128+
writer->close();
129+
if (encryption_key && path.extension() != ".cef") {
130+
auto next = path;
131+
next.replace_extension(".cef");
132+
rename(tmpfile, next);
133+
remove(path);
134+
} else if (!encryption_key && path.extension() == ".cef") {
135+
auto next = path;
136+
next.replace_extension(unencrypted_extension);
137+
rename(tmpfile, next);
138+
remove(path);
139+
} else {
140+
rename(tmpfile, path);
141+
}
142+
}
143+
}
144+
74145
} // namespace cb::crypto

cbcrypto/file_utilities_test.cc

Lines changed: 123 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
*/
1010

1111
#include "cbcrypto/file_utilities.h"
12+
#include "cbcrypto/key_store.h"
1213

1314
#include <cbcrypto/common.h>
1415
#include <cbcrypto/file_reader.h>
@@ -25,26 +26,42 @@ using namespace std::string_literals;
2526
class FileUtilitiesTest : public ::testing::Test {
2627
protected:
2728
void SetUp() override {
29+
keystore = {};
30+
keystore.setActiveKey(DataEncryptionKey::generate());
2831
dir = cb::io::mkdtemp("FileUtilitiesTest");
2932
}
3033

3134
void TearDown() override {
3235
remove_all(dir);
3336
}
3437

35-
void create_file(const std::string& name, const std::string& content) {
36-
std::shared_ptr<DataEncryptionKey> key = DataEncryptionKey::generate();
38+
void create_file(const std::string& name,
39+
std::string_view content,
40+
const bool encrypted = true,
41+
const std::size_t max_chunk_size =
42+
std::numeric_limits<std::size_t>::max()) {
43+
std::shared_ptr<DataEncryptionKey> key;
44+
if (encrypted) {
45+
key = DataEncryptionKey::generate();
46+
keystore.add(key);
47+
}
3748
auto writer = FileWriter::create(key, dir / name);
38-
writer->write(content);
49+
while (!content.empty()) {
50+
auto chunk =
51+
content.substr(0, std::min(content.size(), max_chunk_size));
52+
content.remove_prefix(chunk.size());
53+
writer->write(chunk);
54+
}
3955
writer->flush();
4056
writer.reset();
4157

42-
files[name] = key->getId();
58+
files[name] = encrypted ? key->getId() : "";
4359
}
4460

4561
std::filesystem::path dir;
4662

4763
std::unordered_map<std::string, std::string> files;
64+
KeyStore keystore;
4865
};
4966

5067
TEST_F(FileUtilitiesTest, findDeksInUse) {
@@ -84,7 +101,7 @@ TEST_F(FileUtilitiesTest, findDeksInUse) {
84101
bool error = false;
85102
keys = findDeksInUse(
86103
dir / "no-such-directory",
87-
[](const std::filesystem::path& p) { return true; },
104+
[](const std::filesystem::path&) { return true; },
88105
[&error](auto message, const auto& json) {
89106
EXPECT_EQ("Error occurred while traversing directory", message);
90107
EXPECT_TRUE(json.contains("error")) << json.dump();
@@ -93,3 +110,104 @@ TEST_F(FileUtilitiesTest, findDeksInUse) {
93110
EXPECT_TRUE(error) << "Error callback not called";
94111
EXPECT_TRUE(keys.empty());
95112
}
113+
114+
TEST_F(FileUtilitiesTest, rewriteUnencryptedToEncrypted) {
115+
create_file("file.txt", "This is the content", false);
116+
117+
maybeRewriteFiles(
118+
dir,
119+
[](const auto& path, auto) {
120+
return path.filename().string() == "file.txt";
121+
},
122+
keystore.getActiveKey(),
123+
[this](auto id) { return keystore.lookup(id); },
124+
[](std::string_view, const nlohmann::json&) {});
125+
126+
auto file = dir / "file.txt";
127+
EXPECT_FALSE(std::filesystem::exists(file));
128+
file = dir / "file.cef";
129+
EXPECT_TRUE(std::filesystem::exists(file));
130+
131+
const auto reader = FileReader::create(
132+
file, [this](auto id) { return keystore.lookup(id); });
133+
EXPECT_TRUE(reader->is_encrypted());
134+
EXPECT_EQ("This is the content", reader->read());
135+
}
136+
137+
TEST_F(FileUtilitiesTest, rewriteEncryptedToUnencrypted) {
138+
create_file("file.cef", "This is the content");
139+
maybeRewriteFiles(
140+
dir,
141+
[](const auto& path, auto) {
142+
return path.filename().string() == "file.cef";
143+
},
144+
{},
145+
[this](auto id) { return keystore.lookup(id); },
146+
[](std::string_view, const nlohmann::json&) {});
147+
148+
auto file = dir / "file.cef";
149+
EXPECT_FALSE(std::filesystem::exists(file));
150+
file = dir / "file.txt";
151+
EXPECT_TRUE(std::filesystem::exists(file));
152+
153+
const auto reader = FileReader::create(
154+
file, [this](auto id) { return keystore.lookup(id); });
155+
EXPECT_FALSE(reader->is_encrypted());
156+
EXPECT_EQ("This is the content", reader->read());
157+
}
158+
159+
TEST_F(FileUtilitiesTest, rewriteUsingCertainKey) {
160+
create_file("file1.cef", "This is the content");
161+
const auto key_id = files["file1.cef"];
162+
163+
maybeRewriteFiles(
164+
dir,
165+
[&key_id](const auto&, auto id) { return key_id == id; },
166+
keystore.getActiveKey(),
167+
[this](auto id) { return keystore.lookup(id); },
168+
[](std::string_view, const nlohmann::json&) {});
169+
170+
std::string requested;
171+
const auto reader =
172+
FileReader::create(dir / "file1.cef", [this, &requested](auto id) {
173+
requested = id;
174+
return keystore.lookup(id);
175+
});
176+
177+
EXPECT_TRUE(reader->is_encrypted());
178+
EXPECT_EQ("This is the content", reader->read());
179+
EXPECT_EQ(requested, keystore.getActiveKey()->getId());
180+
}
181+
182+
TEST_F(FileUtilitiesTest, rewriteTruncatedFile) {
183+
std::string data(1024 * 1024, 'a');
184+
create_file("file1.cef", data, true, 1024);
185+
186+
const auto filename = dir / "file1.cef";
187+
const auto size = file_size(filename);
188+
// truncate the file with a partial final block
189+
resize_file(filename, size - 512);
190+
191+
maybeRewriteFiles(
192+
dir,
193+
[](const auto& path, auto) {
194+
return path.filename() == "file1.cef";
195+
},
196+
keystore.getActiveKey(),
197+
[this](auto id) { return keystore.lookup(id); },
198+
[](std::string_view, const nlohmann::json&) {});
199+
200+
std::string requested;
201+
const auto reader =
202+
FileReader::create(filename, [this, &requested](auto id) {
203+
requested = id;
204+
return keystore.lookup(id);
205+
});
206+
207+
EXPECT_TRUE(reader->is_encrypted());
208+
const auto content = reader->read();
209+
EXPECT_EQ(content.size(), data.size() - 1024);
210+
data.resize(data.size() - 1024);
211+
EXPECT_EQ(data, content);
212+
EXPECT_EQ(requested, keystore.getActiveKey()->getId());
213+
}

include/cbcrypto/file_utilities.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include <unordered_set>
1717

1818
namespace cb::crypto {
19+
struct DataEncryptionKey;
20+
using SharedEncryptionKey = std::shared_ptr<const DataEncryptionKey>;
1921

2022
/**
2123
* Find all the DEKs in use in the specified directory
@@ -31,4 +33,29 @@ std::unordered_set<std::string> findDeksInUse(
3133
const std::function<void(std::string_view, const nlohmann::json&)>&
3234
error);
3335

36+
/**
37+
* Iterate over all files in the specified directory and potentially
38+
* rewrite all files.
39+
*
40+
* @param directory The directory to scan
41+
* @param filefilter a function to filter out files we're not interested in
42+
* (return true to inspect the file, false to skip it)
43+
* @param encryption_key The key to use when rewriting the files (if empty the
44+
* file will be written unencrypted)
45+
* @param key_lookup_function A function used to look up encryption keys from
46+
* the id
47+
* @param error a callback to add log messages when errors occurs
48+
* @param unencrypted_extension The extension to use for unencrypted files
49+
*/
50+
void maybeRewriteFiles(
51+
const std::filesystem::path& directory,
52+
const std::function<bool(const std::filesystem::path&,
53+
std::string_view)>& filefilter,
54+
SharedEncryptionKey encryption_key,
55+
const std::function<SharedEncryptionKey(std::string_view)>&
56+
key_lookup_function,
57+
const std::function<void(std::string_view, const nlohmann::json&)>&
58+
error,
59+
std::string_view unencrypted_extension = ".txt");
60+
3461
} // namespace cb::crypto

0 commit comments

Comments
 (0)