Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
04482d4
Experimenting with binary cache
kuznetsss Oct 29, 2025
e33c428
Add cache.local_file options
kuznetsss Oct 31, 2025
8d4427e
WIP: Rewriting cache reading writing
kuznetsss Oct 31, 2025
be922ef
Add streaming shasum class
kuznetsss Nov 3, 2025
2e39083
Fix bugs, add sha256 hashsum
kuznetsss Nov 3, 2025
f2d4f89
Add tests for InputFile
kuznetsss Nov 3, 2025
56cf901
Add tests for OutputFile
kuznetsss Nov 3, 2025
58a9365
Move implementation to cpp files
kuznetsss Nov 3, 2025
4cce68d
Add tests for LedgerCacheFile
kuznetsss Nov 4, 2025
efa12e0
Add save/load code to LedgerCache
kuznetsss Nov 4, 2025
525e0b5
Move LedgerCacheFile implementation into cpp file
kuznetsss Nov 4, 2025
fca246f
Remove buffer and compression options
kuznetsss Nov 5, 2025
58ba86a
Added hash() method to input and output files
kuznetsss Nov 5, 2025
3f7b22d
Change cache file to use one hash for the whole file
kuznetsss Nov 5, 2025
cf2fa1c
WIP: save/load chache on clio start/stop
kuznetsss Nov 6, 2025
35bcb86
Save and load ledger cache in clio
kuznetsss Nov 7, 2025
f6dbe4e
Add more tests
kuznetsss Nov 7, 2025
5dce1f5
Merge remote-tracking branch 'upstream/develop' into 2413_Read_write_…
kuznetsss Nov 7, 2025
8ecf622
Fix after merge
kuznetsss Nov 7, 2025
47a2daa
Merge branch 'develop' into 2413_Read_write_cache_from_disk
kuznetsss Nov 7, 2025
b1b4d27
Run pre-commit
kuznetsss Nov 10, 2025
b631b72
Add missing docs
kuznetsss Nov 10, 2025
6155d2c
Fix build
kuznetsss Nov 10, 2025
f12a0c2
Merge branch 'develop' into 2413_Read_write_cache_from_disk
kuznetsss Nov 10, 2025
03d974f
Remove timestamp and add sequence verification
kuznetsss Nov 10, 2025
17ce255
Save cache file with prefix new
kuznetsss Nov 10, 2025
c22fa4a
Revert precommit changes
kuznetsss Nov 10, 2025
a4749a3
Merge branch 'develop' into 2413_Read_write_cache_from_disk
kuznetsss Nov 10, 2025
5cad274
Fix docs
kuznetsss Nov 10, 2025
63ea5c3
Fix config key
kuznetsss Nov 10, 2025
81edb2a
Fix review comments
kuznetsss Nov 11, 2025
ae5b54f
Update config description
kuznetsss Nov 12, 2025
60e0a6d
Fix review comments
kuznetsss Nov 13, 2025
2651b9c
Merge branch 'develop' into 2413_Read_write_cache_from_disk
kuznetsss Nov 13, 2025
cec4b44
Fix typo
kuznetsss Nov 13, 2025
b5567bc
Merge branch 'develop' into 2413_Read_write_cache_from_disk
kuznetsss Nov 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/config-description.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,22 @@ This document provides a list of all available Clio configuration properties in
- **Constraints**: The value must be one of the following: `sync`, `async`, `none`.
- **Description**: The strategy used for Cache loading.

### cache.file.path

- **Required**: False
- **Type**: string
- **Default value**: None
- **Constraints**: None
- **Description**: The path to a file where cache will be saved to on shutdown and loaded from on startup. If the file couldn't be read Clio will load cache as usual (from DB or from rippled).

### cache.file.max_sequence_age

- **Required**: True
- **Type**: int
- **Default value**: `5000`
- **Constraints**: None
- **Description**: Max allowed difference between the latest sequence in DB and in cache file. If the cache file is too old (contains too low latest sequence) Clio will reject using it.

### log.channels.[].channel

- **Required**: False
Expand Down
6 changes: 5 additions & 1 deletion docs/examples/config/example-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,11 @@
// "num_cursors_from_account": 3200, // Read the cursors from the account table until we have enough cursors to partition the ledger to load concurrently.
"num_markers": 48, // The number of markers is the number of coroutines to load the cache concurrently.
"page_fetch_size": 512, // The number of rows to load for each page.
"load": "async" // "sync" to load cache synchronously or "async" to load cache asynchronously or "none"/"no" to turn off the cache.
"load": "async", // "sync" to load cache synchronously or "async" to load cache asynchronously or "none"/"no" to turn off the cache.
"file": {
"path": "./cache.bin",
"max_sequence_age": 5000
}
},
"prometheus": {
"enabled": true,
Expand Down
17 changes: 17 additions & 0 deletions src/app/ClioApplication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <cstdlib>
#include <memory>
#include <optional>
#include <string>
#include <thread>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -110,7 +111,23 @@ ClioApplication::run(bool const useNgWebServer)
auto const dosguardWeights = web::dosguard::Weights::make(config_);
auto dosGuard = web::dosguard::DOSGuard{config_, whitelistHandler, dosguardWeights};
auto sweepHandler = web::dosguard::IntervalSweepHandler{config_, ioc, dosGuard};

auto cache = data::LedgerCache{};
appStopper_.setOnStop([&cache, this](auto&&) {
// TODO(kuznetsss): move this into Stopper::makeOnStopCallback()
auto const cacheFilePath = config_.maybeValue<std::string>("cache.file.path");
if (not cacheFilePath.has_value()) {
return;
}

LOG(util::LogService::info()) << "Saving ledger cache to " << *cacheFilePath;
if (auto const [success, duration_ms] = util::timed([&]() { return cache.saveToFile(*cacheFilePath); });
success.has_value()) {
LOG(util::LogService::info()) << "Successfully saved ledger cache in " << duration_ms << " ms";
} else {
LOG(util::LogService::error()) << "Error saving LedgerCache to file";
}
});

// Interface to the database
auto backend = data::makeBackend(config_, cache);
Expand Down
25 changes: 19 additions & 6 deletions src/data/BackendInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ BackendInterface::updateRange(uint32_t newMax)
{
std::scoped_lock const lck(rngMtx_);

if (range_.has_value() && newMax < range_->maxSequence) {
if (range_.has_value() and newMax < range_->maxSequence) {
ASSERT(
false,
"Range shouldn't exist yet or newMax should be at least range->maxSequence. newMax = {}, "
Expand All @@ -280,11 +280,14 @@ BackendInterface::updateRange(uint32_t newMax)
);
}

if (!range_.has_value()) {
range_ = {.minSequence = newMax, .maxSequence = newMax};
} else {
range_->maxSequence = newMax;
}
updateRangeImpl(newMax);
}

void
BackendInterface::forceUpdateRange(uint32_t newMax)
{
std::scoped_lock const lck(rngMtx_);
updateRangeImpl(newMax);
}

void
Expand Down Expand Up @@ -410,4 +413,14 @@ BackendInterface::fetchFees(std::uint32_t const seq, boost::asio::yield_context
return fees;
}

void
BackendInterface::updateRangeImpl(uint32_t newMax)
{
if (!range_.has_value()) {
range_ = {.minSequence = newMax, .maxSequence = newMax};
} else {
range_->maxSequence = newMax;
}
}

} // namespace data
12 changes: 12 additions & 0 deletions src/data/BackendInterface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,15 @@ class BackendInterface {
void
updateRange(uint32_t newMax);

/**
* @brief Updates the range of sequences that are stored in the DB without any checks
* @note In the most cases you should use updateRange() instead
*
* @param newMax The new maximum sequence available
*/
void
forceUpdateRange(uint32_t newMax);

/**
* @brief Sets the range of sequences that are stored in the DB.
*
Expand Down Expand Up @@ -776,6 +785,9 @@ class BackendInterface {
*/
virtual bool
doFinishWrites() = 0;

void
updateRangeImpl(uint32_t newMax);
};

} // namespace data
Expand Down
3 changes: 3 additions & 0 deletions src/data/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ target_sources(
cassandra/impl/SslContext.cpp
cassandra/Handle.cpp
cassandra/SettingsProvider.cpp
impl/InputFile.cpp
impl/LedgerCacheFile.cpp
impl/OutputFile.cpp
)

target_link_libraries(clio_data PUBLIC cassandra-cpp-driver::cassandra-cpp-driver clio_util)
36 changes: 36 additions & 0 deletions src/data/LedgerCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,22 @@
#include "data/LedgerCache.hpp"

#include "data/Types.hpp"
#include "data/impl/LedgerCacheFile.hpp"
#include "etl/Models.hpp"
#include "util/Assert.hpp"

#include <xrpl/basics/base_uint.h>

#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <map>
#include <mutex>
#include <optional>
#include <shared_mutex>
#include <string>
#include <utility>
#include <vector>

namespace data {
Expand Down Expand Up @@ -251,4 +257,34 @@ LedgerCache::getSuccessorHitRate() const
return static_cast<float>(successorHitCounter_.get().value()) / successorReqCounter_.get().value();
}

std::expected<void, std::string>
LedgerCache::saveToFile(std::string const& path) const
{
if (not isFull()) {
return std::unexpected{"Ledger cache is not full"};
}

impl::LedgerCacheFile file{path};
std::unique_lock lock{mtx_};
impl::LedgerCacheFile::DataView data{.latestSeq = latestSeq_, .map = map_, .deleted = deleted_};
return file.write(data);
}

std::expected<void, std::string>
LedgerCache::loadFromFile(std::string const& path, uint32_t minLatestSequence)
{
impl::LedgerCacheFile file{path};
auto data = file.read(minLatestSequence);
if (not data.has_value()) {
return std::unexpected(std::move(data).error());
}
auto [latestSeq, map, deleted] = std::move(data).value();
std::unique_lock lock{mtx_};
latestSeq_ = latestSeq;
map_ = std::move(map);
deleted_ = std::move(deleted);
full_ = true;
return {};
}

} // namespace data
23 changes: 21 additions & 2 deletions src/data/LedgerCache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <map>
#include <optional>
#include <shared_mutex>
#include <string>
#include <unordered_set>
#include <vector>

Expand All @@ -46,11 +47,16 @@ namespace data {
* @brief Cache for an entire ledger.
*/
class LedgerCache : public LedgerCacheInterface {
public:
/** @brief An entry of the cache */
struct CacheEntry {
uint32_t seq = 0;
Blob blob;
};

using CacheMap = std::map<ripple::uint256, CacheEntry>;

private:
// counters for fetchLedgerObject(s) hit rate
std::reference_wrapper<util::prometheus::CounterInt> objectReqCounter_{PrometheusService::counterInt(
"ledger_cache_counter_total_number",
Expand All @@ -73,8 +79,8 @@ class LedgerCache : public LedgerCacheInterface {
util::prometheus::Labels({{"type", "cache_hit"}, {"fetch", "successor_key"}})
)};

std::map<ripple::uint256, CacheEntry> map_;
std::map<ripple::uint256, CacheEntry> deleted_;
CacheMap map_;
CacheMap deleted_;

mutable std::shared_mutex mtx_;
std::condition_variable_any cv_;
Expand Down Expand Up @@ -138,6 +144,19 @@ class LedgerCache : public LedgerCacheInterface {

void
waitUntilCacheContainsSeq(uint32_t seq) override;

/**
* @brief Save the cache to file
* @note This operation takes about 7 seconds and it keeps mtx_ exclusively locked
*
* @param path The file path to save the cache to
* @return An error as a string if any
*/
std::expected<void, std::string>
saveToFile(std::string const& path) const;

std::expected<void, std::string>
loadFromFile(std::string const& path, uint32_t minLatestSequence) override;
};

} // namespace data
13 changes: 13 additions & 0 deletions src/data/LedgerCacheInterface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@

#include <cstddef>
#include <cstdint>
#include <expected>
#include <optional>
#include <string>
#include <vector>

namespace data {
Expand Down Expand Up @@ -168,6 +170,17 @@ class LedgerCacheInterface {
*/
virtual void
waitUntilCacheContainsSeq(uint32_t seq) = 0;

/**
* @brief Load the cache from file
* @note This operation takes about 7 seconds and it keeps mtx_ exclusively locked
*
* @param path The file path to load data from
* @param minLatestSequence The minimum allowed value of the latestLedgerSequence in cache file
* @return An error as a string if any
*/
[[nodiscard]] virtual std::expected<void, std::string>
loadFromFile(std::string const& path, uint32_t minLatestSequence) = 0;
};

} // namespace data
3 changes: 3 additions & 0 deletions src/data/Types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ struct MPTHoldersAndCursor {
struct LedgerRange {
std::uint32_t minSequence = 0;
std::uint32_t maxSequence = 0;

bool
operator==(LedgerRange const&) const = default;
};

/**
Expand Down
58 changes: 58 additions & 0 deletions src/data/impl/InputFile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//------------------------------------------------------------------------------
/*
This file is part of clio: https://github.com/XRPLF/clio
Copyright (c) 2025, the clio developers.

Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.

THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================

#include "data/impl/InputFile.hpp"

#include <xrpl/basics/base_uint.h>

#include <cstddef>
#include <cstring>
#include <ios>
#include <iosfwd>
#include <string>
#include <utility>

namespace data::impl {

InputFile::InputFile(std::string const& path) : file_(path, std::ios::binary | std::ios::in)
{
}

bool
InputFile::isOpen() const
{
return file_.is_open();
}

bool
InputFile::readRaw(char* data, size_t size)
{
file_.read(data, size);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

null check before reading?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will crash anyway if nullptr is passed here. Or do you mean something else?

shasum_.update(data, size);
return not file_.fail();
}

ripple::uint256
InputFile::hash() const
{
auto sum = shasum_;
return std::move(sum).finalize();
}

} // namespace data::impl
Loading
Loading