Skip to content

Commit 5b80090

Browse files
authored
Merge pull request #982 from openzim/user_defined_cost_for_caches
Support for user-defined cost in cache classes
2 parents fe520a3 + d28077f commit 5b80090

8 files changed

+1049
-70
lines changed

src/concurrent_cache.h

Lines changed: 75 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
#define ZIM_CONCURRENT_CACHE_H
2323

2424
#include "lrucache.h"
25+
#include "log.h"
2526

27+
#include <chrono>
2628
#include <cstddef>
2729
#include <future>
2830
#include <mutex>
@@ -39,65 +41,119 @@ namespace zim
3941
safe, and, in case of a cache miss, will block until that element becomes
4042
available.
4143
*/
42-
template <typename Key, typename Value>
44+
template <typename Key, typename Value, typename CostEstimation>
4345
class ConcurrentCache
4446
{
4547
private: // types
4648
typedef std::shared_future<Value> ValuePlaceholder;
47-
typedef lru_cache<Key, ValuePlaceholder> Impl;
4849

49-
public: // types
50-
explicit ConcurrentCache(size_t maxEntries)
51-
: impl_(maxEntries)
50+
struct CacheEntry
51+
{
52+
size_t cost = 0;
53+
ValuePlaceholder value;
54+
55+
bool ready() const {
56+
const auto zeroNs = std::chrono::nanoseconds::zero();
57+
return value.wait_for(zeroNs) == std::future_status::ready;
58+
}
59+
};
60+
61+
struct GetCacheEntryCost
62+
{
63+
static size_t cost(const CacheEntry& x) { return x.cost; }
64+
};
65+
66+
typedef lru_cache<Key, CacheEntry, GetCacheEntryCost> Impl;
67+
68+
public: // functions
69+
explicit ConcurrentCache(size_t maxCost)
70+
: impl_(maxCost)
5271
{}
5372

5473
// Gets the entry corresponding to the given key. If the entry is not in the
5574
// cache, it is obtained by calling f() (without any arguments) and the
5675
// result is put into the cache.
5776
//
5877
// The cache as a whole is locked only for the duration of accessing
59-
// the respective slot. If, in the case of the a cache miss, the generation
78+
// the respective slot. If, in the case of a cache miss, the generation
6079
// of the missing element takes a long time, only attempts to access that
6180
// element will block - the rest of the cache remains open to concurrent
6281
// access.
6382
template<class F>
6483
Value getOrPut(const Key& key, F f)
6584
{
85+
log_debug_func_call("ConcurrentCache::getOrPut", key);
86+
6687
std::promise<Value> valuePromise;
67-
std::unique_lock<std::mutex> l(lock_);
68-
const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
69-
l.unlock();
88+
const auto x = getCacheSlot(key, valuePromise.get_future().share());
89+
CacheEntry cacheEntry(x.value());
90+
log_debug("Obtained the cache slot");
7091
if ( x.miss() ) {
92+
log_debug("It was a cache miss. Going to obtain the value...");
7193
try {
72-
valuePromise.set_value(f());
94+
cacheEntry.cost = materializeValue(valuePromise, f);
95+
finalizeCacheMiss(key, cacheEntry);
96+
log_debug("Done. Cache cost is at " << getCurrentCost() );
7397
} catch (std::exception& e) {
98+
log_debug("Evaluation failed. Releasing the cache slot...");
7499
drop(key);
75100
throw;
76101
}
77102
}
78103

79-
return x.value().get();
104+
log_debug((!cacheEntry.ready() ? "Waiting for result..." : "Returning immediately..."));
105+
return log_debug_return_value(cacheEntry.value.get());
80106
}
81107

82108
bool drop(const Key& key)
83109
{
84-
std::unique_lock<std::mutex> l(lock_);
110+
log_debug_func_call("ConcurrentCache::drop", key);
111+
log_debug_raii_sync_statement(std::unique_lock<std::mutex> l(lock_));
85112
return impl_.drop(key);
86113
}
87114

88-
size_t getMaxSize() const {
115+
size_t getMaxCost() const {
89116
std::unique_lock<std::mutex> l(lock_);
90-
return impl_.getMaxSize();
117+
return impl_.getMaxCost();
91118
}
92119

93-
size_t getCurrentSize() const {
120+
size_t getCurrentCost() const {
94121
std::unique_lock<std::mutex> l(lock_);
95-
return impl_.size();
122+
return impl_.cost();
96123
}
97124

98-
void setMaxSize(size_t newSize) {
99-
std::unique_lock<std::mutex> l(lock_);
100-
return impl_.setMaxSize(newSize);
125+
void setMaxCost(size_t newSize) {
126+
log_debug_func_call("ConcurrentCache::setMaxCost", newSize);
127+
log_debug_raii_sync_statement(std::unique_lock<std::mutex> l(lock_));
128+
return impl_.setMaxCost(newSize);
129+
}
130+
131+
private: // functions
132+
typename Impl::AccessResult getCacheSlot(const Key& key, const ValuePlaceholder& v)
133+
{
134+
log_debug_func_call("ConcurrentCache::getCacheSlot", key);
135+
log_debug_raii_sync_statement(std::unique_lock<std::mutex> l(lock_));
136+
return impl_.getOrPut(key, CacheEntry{0, v});
137+
}
138+
139+
template<class F>
140+
static size_t materializeValue(std::promise<Value>& valuePromise, F f)
141+
{
142+
const auto materializedValue = f();
143+
log_debug("Value was successfully obtained.");
144+
valuePromise.set_value(materializedValue);
145+
log_debug("Made the value available for concurrent access.");
146+
log_debug("Computing the cost of the new entry...");
147+
auto cost = CostEstimation::cost(materializedValue);
148+
log_debug("cost=" << cost);
149+
return cost;
150+
}
151+
152+
void finalizeCacheMiss(const Key& key, const CacheEntry& cacheEntry)
153+
{
154+
log_debug_func_call("ConcurrentCache::finalizeCacheMiss", key);
155+
log_debug_raii_sync_statement(std::unique_lock<std::mutex> l(lock_));
156+
impl_.put(key, cacheEntry);
101157
}
102158

103159
private: // data

src/dirent_accessor.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
5555
std::shared_ptr<const Dirent> getDirent(entry_index_t idx) const;
5656
entry_index_t getDirentCount() const { return m_direntCount; }
5757

58-
size_t getMaxCacheSize() const { return m_direntCache.getMaxSize(); }
59-
size_t getCurrentCacheSize() const { return m_direntCache.size(); }
60-
void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxSize(nbDirents); }
58+
size_t getMaxCacheSize() const { return m_direntCache.getMaxCost(); }
59+
size_t getCurrentCacheSize() const { return m_direntCache.cost(); }
60+
void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxCost(nbDirents); }
6161

6262
private: // functions
6363
std::shared_ptr<const Dirent> readDirent(offset_t) const;
@@ -67,7 +67,7 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
6767
std::unique_ptr<const Reader> mp_pathPtrReader;
6868
entry_index_t m_direntCount;
6969

70-
mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>> m_direntCache;
70+
mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>, UnitCostEstimation> m_direntCache;
7171
mutable std::mutex m_direntCacheLock;
7272

7373
mutable std::vector<char> m_bufferDirentZone;

src/fileimpl.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -805,13 +805,13 @@ bool checkTitleListing(const IndirectDirentAccessor& accessor, entry_index_type
805805

806806

807807
size_t FileImpl::getClusterCacheMaxSize() const {
808-
return clusterCache.getMaxSize();
808+
return clusterCache.getMaxCost();
809809
}
810810
size_t FileImpl::getClusterCacheCurrentSize() const {
811-
return clusterCache.getCurrentSize();
811+
return clusterCache.getCurrentCost();
812812
}
813813
void FileImpl::setClusterCacheMaxSize(size_t nbClusters) {
814-
clusterCache.setMaxSize(nbClusters);
814+
clusterCache.setMaxCost(nbClusters);
815815
}
816816

817817
size_t FileImpl::getDirentCacheMaxSize() const {

src/fileimpl.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "file_compound.h"
3838
#include "fileheader.h"
3939
#include "zim/archive.h"
40+
#include "lrucache.h"
4041
#include "zim_types.h"
4142
#include "direntreader.h"
4243

@@ -59,7 +60,7 @@ namespace zim
5960
std::unique_ptr<const IndirectDirentAccessor> mp_titleDirentAccessor;
6061

6162
typedef std::shared_ptr<const Cluster> ClusterHandle;
62-
mutable ConcurrentCache<cluster_index_type, ClusterHandle> clusterCache;
63+
mutable ConcurrentCache<cluster_index_type, ClusterHandle, UnitCostEstimation> clusterCache;
6364

6465
const bool m_hasFrontArticlesIndex;
6566
const entry_index_t m_startUserEntry;

src/log.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
*
1818
*/
1919

20+
#ifndef ZIM_LOG_H
21+
#define ZIM_LOG_H
22+
2023
#include "config.h"
2124

2225
// Should we keep the dependence on cxxtools logging framework?
@@ -206,3 +209,5 @@ namespace LoggingImpl
206209
#define log_init()
207210
208211
#endif
212+
213+
#endif // ZIM_LOG_H

0 commit comments

Comments
 (0)