Skip to content

Commit 4f4a99a

Browse files
committed
Add hybrid disk/memory caching with Foyer.
1 parent f560be7 commit 4f4a99a

File tree

14 files changed

+1444
-192
lines changed

14 files changed

+1444
-192
lines changed

Cargo.lock

Lines changed: 824 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,17 @@ regex = "1"
3535
csv = "1"
3636
base64 = "0.22"
3737
rust-stemmers = "1.2"
38+
foyer = "0.21"
39+
bytes = "1"
40+
bincode = "1"
41+
rmp-serde = "1"
42+
md5 = "0.8"
43+
mimalloc = { version = "0.1", default-features = false }
44+
45+
[features]
46+
default = []
47+
# Use mimalloc for musl builds (musl's malloc is very slow)
48+
musl = []
3849

3950
[profile.release]
4051
strip = true

config.sample.toml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,28 @@ dicts = [["english", "english"]]
4141
max_conns = 100
4242

4343

44+
[cache]
45+
# Cache public search results to speed up repeated queries. On large dictionary
46+
# databases that get a lot of public traffic, enabling caching can significantly
47+
# reduce the load on the database and speed up response times.
48+
enabled = false
49+
50+
# How long cached entries should live (e.g., "72h", "30m", "1d").
51+
ttl = "72h"
52+
53+
# Cache mode: "memory" (RAM only) or "hybrid" (RAM + disk).
54+
mode = "hybrid"
55+
56+
# Maximum memory (in MB) for the in-memory cache.
57+
max_memory_mb = 128
58+
59+
# Maximum disk size (in MB) for the disk cache (hybrid mode only).
60+
max_disk_mb = 512
61+
62+
# Directory for disk cache storage (hybrid mode only).
63+
dir = "/tmp/dictpress-cache"
64+
65+
4466
[api_results]
4567
# Default number of entries to return per page when paginated.
4668
per_page = 10

docs/static/logo.svg

Lines changed: 60 additions & 126 deletions
Loading

docs/static/style.css

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ html, body {
1616

1717
body {
1818
background: #fff;
19-
font-family: Inter, sans-serif;
19+
font-family: sans-serif;
2020
font-size: 17px;
2121
line-height: 29px;
2222
color: var(--secondary);

src/cache.rs

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
use std::time::{Duration, SystemTime, UNIX_EPOCH};
2+
3+
use bytes::{BufMut, Bytes, BytesMut};
4+
use foyer::{
5+
BlockEngineBuilder, Cache as FoyerCache, CacheBuilder, Compression, DeviceBuilder,
6+
FsDeviceBuilder, HybridCache, HybridCacheBuilder, RecoverMode,
7+
};
8+
use serde::Deserialize;
9+
use thiserror::Error;
10+
11+
use crate::models::SearchQuery;
12+
13+
const MODE_MEMORY: &str = "memory";
14+
const MODE_HYBRID: &str = "hybrid";
15+
16+
/// Size of TTL prefix (u64 timestamp).
17+
const TTL_PREFIX_SIZE: usize = 8;
18+
19+
/// Cache configuration.
20+
#[derive(Debug, Clone, Deserialize)]
21+
pub struct CacheConfig {
22+
#[serde(default)]
23+
pub enabled: bool,
24+
25+
/// TTL duration string (e.g., "72h", "30m", "1d").
26+
#[serde(default = "default_cache_ttl")]
27+
pub ttl: String,
28+
29+
/// Cache mode: "memory" or "hybrid".
30+
#[serde(default = "default_cache_mode")]
31+
pub mode: String,
32+
33+
/// Maximum memory in MB for in-memory cache.
34+
#[serde(default = "default_cache_memory")]
35+
pub max_memory_mb: u64,
36+
37+
/// Maximum disk size in MB for hybrid mode.
38+
#[serde(default = "default_cache_disk")]
39+
pub max_disk_mb: u64,
40+
41+
/// Directory for disk cache (hybrid mode only).
42+
#[serde(default = "default_cache_dir")]
43+
pub dir: String,
44+
}
45+
46+
fn default_cache_ttl() -> String {
47+
"72h".to_string()
48+
}
49+
50+
fn default_cache_mode() -> String {
51+
"hybrid".to_string()
52+
}
53+
54+
fn default_cache_memory() -> u64 {
55+
128
56+
}
57+
58+
fn default_cache_disk() -> u64 {
59+
512
60+
}
61+
62+
fn default_cache_dir() -> String {
63+
"/tmp/dictpress-cache".to_string()
64+
}
65+
66+
impl Default for CacheConfig {
67+
fn default() -> Self {
68+
Self {
69+
enabled: false,
70+
ttl: default_cache_ttl(),
71+
mode: default_cache_mode(),
72+
max_memory_mb: default_cache_memory(),
73+
max_disk_mb: default_cache_disk(),
74+
dir: default_cache_dir(),
75+
}
76+
}
77+
}
78+
79+
#[derive(Debug, Error)]
80+
pub enum CacheError {
81+
#[error("cache build error: {0}")]
82+
Build(String),
83+
84+
#[error("invalid TTL format: {0}")]
85+
InvalidTtl(String),
86+
87+
#[error("invalid cache mode: {0}")]
88+
InvalidMode(String),
89+
}
90+
91+
/// Cache backend abstraction.
92+
enum CacheBackend {
93+
Memory(FoyerCache<String, Bytes>),
94+
Hybrid(HybridCache<String, Bytes>),
95+
}
96+
97+
/// Cache wrapper with TTL support.
98+
pub struct Cache {
99+
backend: CacheBackend,
100+
ttl: Duration,
101+
}
102+
103+
impl Cache {
104+
/// Create a new cache instance.
105+
pub async fn new(cfg: &CacheConfig) -> Result<Self, CacheError> {
106+
let ttl = parse_duration(&cfg.ttl)?;
107+
let memory_bytes = (cfg.max_memory_mb * 1024 * 1024) as usize;
108+
109+
let backend = match cfg.mode.as_str() {
110+
MODE_MEMORY => {
111+
let cache = CacheBuilder::new(memory_bytes)
112+
.with_weighter(|_key, value: &Bytes| value.len())
113+
.build();
114+
CacheBackend::Memory(cache)
115+
}
116+
117+
MODE_HYBRID => {
118+
let disk_bytes = (cfg.max_disk_mb * 1024 * 1024) as usize;
119+
120+
// Build filesystem device.
121+
let device = FsDeviceBuilder::new(&cfg.dir)
122+
.with_capacity(disk_bytes)
123+
.build()
124+
.map_err(|e| CacheError::Build(e.to_string()))?;
125+
126+
// Build hybrid cache.
127+
let cache = HybridCacheBuilder::new()
128+
.memory(memory_bytes)
129+
.storage()
130+
.with_compression(Compression::None)
131+
.with_engine_config(BlockEngineBuilder::new(device))
132+
.with_recover_mode(RecoverMode::Quiet)
133+
.build()
134+
.await
135+
.map_err(|e| CacheError::Build(e.to_string()))?;
136+
137+
CacheBackend::Hybrid(cache)
138+
}
139+
_ => return Err(CacheError::InvalidMode(cfg.mode.clone())),
140+
};
141+
142+
Ok(Self { backend, ttl })
143+
}
144+
145+
/// Get a value from the cache. Returns None if not found or expired.
146+
pub async fn get(&self, key: &str) -> Option<Bytes> {
147+
let raw = match &self.backend {
148+
CacheBackend::Memory(c) => c.get(key).map(|e| e.value().clone()),
149+
CacheBackend::Hybrid(c) => match c.get(key).await {
150+
Ok(Some(entry)) => Some(entry.value().clone()),
151+
Ok(None) => None,
152+
Err(e) => {
153+
log::warn!("cache get (hybrid) key={}: error: {}", key, e);
154+
None
155+
}
156+
},
157+
}?;
158+
159+
// Need at least TTL prefix.
160+
if raw.len() < TTL_PREFIX_SIZE {
161+
return None;
162+
}
163+
164+
// Read TTL from first 8 bytes.
165+
let created_at = u64::from_le_bytes(raw[..TTL_PREFIX_SIZE].try_into().ok()?);
166+
167+
let now = SystemTime::now()
168+
.duration_since(UNIX_EPOCH)
169+
.unwrap_or_default()
170+
.as_secs();
171+
172+
if now.saturating_sub(created_at) > self.ttl.as_secs() {
173+
return None;
174+
}
175+
176+
// Return data slice.
177+
Some(raw.slice(TTL_PREFIX_SIZE..))
178+
}
179+
180+
/// Store a value in the cache with current timestamp prefix.
181+
pub fn put(&self, key: &str, value: &[u8]) {
182+
let now = SystemTime::now()
183+
.duration_since(UNIX_EPOCH)
184+
.unwrap_or_default()
185+
.as_secs();
186+
187+
// Prepend TTL timestamp (first 8 bytes).
188+
let mut buf = BytesMut::with_capacity(TTL_PREFIX_SIZE + value.len());
189+
buf.put_u64_le(now);
190+
buf.extend_from_slice(value);
191+
let data = buf.freeze();
192+
193+
match &self.backend {
194+
CacheBackend::Memory(c) => {
195+
c.insert(key.to_string(), data);
196+
}
197+
CacheBackend::Hybrid(c) => {
198+
c.insert(key.to_string(), data);
199+
}
200+
}
201+
}
202+
203+
/// Close the cache and flush pending writes.
204+
pub async fn close(&self) {
205+
if let CacheBackend::Hybrid(c) = &self.backend {
206+
c.close().await.ok();
207+
}
208+
}
209+
}
210+
211+
/// Generate a cache key for search queries.
212+
/// Matches the Go implementation's makeQueryCacheKey().
213+
pub fn make_search_cache_key(q: &SearchQuery) -> String {
214+
let mut types = q.types.clone();
215+
types.sort();
216+
let mut tags = q.tags.clone();
217+
tags.sort();
218+
219+
let key = format!(
220+
"s:{}:{}:{}:{}:{}:{}:{}:{}",
221+
q.from_lang,
222+
q.to_lang,
223+
q.query.to_lowercase().trim(),
224+
types.join(","),
225+
tags.join(","),
226+
q.status,
227+
q.page,
228+
q.per_page
229+
);
230+
231+
let digest = md5::compute(key.as_bytes());
232+
format!("s:{:x}", digest)
233+
}
234+
235+
/// Generate a cache key for glossary queries.
236+
/// Matches the Go implementation's makeGlossaryCacheKey().
237+
pub fn make_glossary_cache_key(lang: &str, initial: &str, offset: i32, limit: i32) -> String {
238+
let key = format!("g:{}:{}:{}:{}", lang, initial, offset, limit);
239+
let digest = md5::compute(key.as_bytes());
240+
format!("g:{:x}", digest)
241+
}
242+
243+
/// Parse a duration string like "72h", "30m", "1d" into Duration.
244+
fn parse_duration(s: &str) -> Result<Duration, CacheError> {
245+
let s = s.trim();
246+
if s.is_empty() {
247+
return Err(CacheError::InvalidTtl("empty duration".to_string()));
248+
}
249+
250+
let (num_str, unit) = s.split_at(s.len() - 1);
251+
let num: u64 = num_str
252+
.parse()
253+
.map_err(|_| CacheError::InvalidTtl(s.to_string()))?;
254+
255+
let secs = match unit {
256+
"s" => num,
257+
"m" => num * 60,
258+
"h" => num * 3600,
259+
"d" => num * 86400,
260+
_ => return Err(CacheError::InvalidTtl(s.to_string())),
261+
};
262+
263+
Ok(Duration::from_secs(secs))
264+
}

src/handlers/entries.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use axum::{
77
};
88

99
use super::{json, ApiErr, ApiResp, Ctx, Result};
10-
use crate::models::{Entry, RelationsQuery};
10+
use crate::models::{Entry, JsonString, RelationsQuery};
1111

1212
/// Entry creation/update request.
1313
#[derive(Debug, serde::Deserialize)]
@@ -28,7 +28,7 @@ pub struct EntryReq {
2828
#[serde(default)]
2929
pub notes: String,
3030
#[serde(default)]
31-
pub meta: serde_json::Value,
31+
pub meta: JsonString,
3232
#[serde(default)]
3333
pub status: String,
3434
}

src/handlers/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use serde::Serialize;
1717
use tera::Tera;
1818

1919
use crate::{
20+
cache::Cache,
2021
manager::Manager,
2122
models::{Dicts, LangMap},
2223
};
@@ -28,6 +29,7 @@ pub struct Ctx {
2829
pub mgr: Arc<Manager>,
2930
pub langs: LangMap,
3031
pub dicts: Dicts,
32+
pub cache: Option<Arc<Cache>>,
3133

3234
/// Admin templates (always loaded, embedded in binary).
3335
pub admin_tpl: Arc<Tera>,

0 commit comments

Comments
 (0)