Skip to content

Commit b3c383c

Browse files
committed
refactor(core): migrate lang_analyzer cache from bincode to postcard
Replace bincode 1.x (RUSTSEC-2025-0141 — permanently unmaintained) with postcard 1.x in the three call sites inside mlxcel-core's lang_analyzer/cache.rs. postcard 1.x is 1.0+ stable with a frozen wire format, serde-based (drop-in API), and is the embedded Rust ecosystem standard — a natural fit for an inference runtime crate. Changed files: Cargo.toml (bincode → postcard 1 alloc feature), cache.rs (bincode::deserialize → postcard::from_bytes, bincode::serialize → postcard::to_allocvec, doc comments updated), mod.rs (Bincode error variant → Postcard, module doc updated).
1 parent db35c68 commit b3c383c

4 files changed

Lines changed: 101 additions & 20 deletions

File tree

Cargo.lock

Lines changed: 91 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/lib/mlxcel-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ smallvec = { version = "1.13", features = ["serde"] }
2323
sha2 = "0.11"
2424
thiserror = "2.0"
2525
tokenizers = "0.22.2"
26-
bincode = "1"
26+
postcard = { version = "1", features = ["alloc"] }
2727
dirs = "6"
2828
libm = "0.2"
2929
tracing = "0.1"

src/lib/mlxcel-core/src/lang_analyzer/cache.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
//! Disk cache for `TokenLanguageIndex` (B4 — vocab-hash keyed, bincode v1).
15+
//! Disk cache for `TokenLanguageIndex` (B4 — vocab-hash keyed, postcard 1.x).
1616
//!
1717
//! # Cache key
1818
//! `vocab_hash = hex(sha256(tokenizer.json bytes))[..16]`
@@ -24,7 +24,7 @@
2424
//! - File missing → build and write.
2525
//! - `version` field mismatch → rebuild and overwrite.
2626
//! - `--lang-bias-rebuild-cache` / `rebuild: bool` → force rebuild.
27-
//! - Corrupted bincode → rename to `*.broken.<epoch>.bak` then rebuild.
27+
//! - Corrupted postcard data → rename to `*.broken.<epoch>.bak` then rebuild.
2828
2929
use std::path::PathBuf;
3030

@@ -69,15 +69,15 @@ pub fn cache_path(vocab_hash: &str) -> PathBuf {
6969
/// On a version mismatch the corrupted/stale file is left in place (the
7070
/// caller will overwrite it via [`save`]).
7171
///
72-
/// On a **bincode decode failure** the corrupt file is renamed to
72+
/// On a **postcard decode failure** the corrupt file is renamed to
7373
/// `<original>.broken.<epoch_secs>.bak` before returning `None`, so the
7474
/// caller can build fresh without worrying about re-encountering the same
7575
/// corrupt bytes.
7676
pub fn try_load(vocab_hash: &str) -> Option<TokenLanguageIndex> {
7777
let path = cache_path(vocab_hash);
7878
let bytes = std::fs::read(&path).ok()?;
7979

80-
match bincode::deserialize::<TokenLanguageIndex>(&bytes) {
80+
match postcard::from_bytes::<TokenLanguageIndex>(&bytes) {
8181
Ok(idx) if idx.version == CURRENT_VERSION => Some(idx),
8282
Ok(_) => {
8383
// Version mismatch — stale cache. Leave the file; the caller will
@@ -121,7 +121,7 @@ pub fn save(index: &TokenLanguageIndex) -> Result<(), LangAnalyzerError> {
121121
if let Some(parent) = path.parent() {
122122
std::fs::create_dir_all(parent)?;
123123
}
124-
let bytes = bincode::serialize(index)?;
124+
let bytes = postcard::to_allocvec(index)?;
125125
// Write to a sibling temp file first to ensure atomicity.
126126
let tmp = path.with_extension("bin.tmp");
127127
std::fs::write(&tmp, &bytes)?;
@@ -364,7 +364,7 @@ mod tests {
364364
std::env::set_var("MLXCEL_CACHE_DIR", tmp.path());
365365
let path = cache_path(hash);
366366
std::fs::create_dir_all(path.parent().unwrap()).expect("create dirs");
367-
std::fs::write(&path, b"not valid bincode data!!!").expect("write garbage");
367+
std::fs::write(&path, b"not valid postcard data!!!").expect("write garbage");
368368
let result = try_load(hash);
369369
let path_still_exists = path.exists();
370370
let cache_dir = path.parent().unwrap().to_path_buf();

src/lib/mlxcel-core/src/lang_analyzer/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
//! The module is structured in sub-issues:
2323
//! - **B2 (this file, initial)**: `Script` enum, `classify_token`, helper predicates.
2424
//! - **B3** (added in the same file): `TokenScriptInfo`, `TokenLanguageIndex`, `build()`.
25-
//! - **B4** (`cache` submodule): disk cache for `TokenLanguageIndex` (vocab-hash keyed, bincode v1).
25+
//! - **B4** (`cache` submodule): disk cache for `TokenLanguageIndex` (vocab-hash keyed, postcard 1.x).
2626
2727
pub mod cache;
2828
pub use cache::{cache_path, load_or_build, save, try_load};
@@ -284,8 +284,8 @@ pub enum LangAnalyzerError {
284284
Io(#[from] std::io::Error),
285285
#[error("tokenizer.json not found at path: {0}")]
286286
TokenizerJsonNotFound(String),
287-
#[error("bincode serialization error: {0}")]
288-
Bincode(#[from] bincode::Error),
287+
#[error("postcard serialization error: {0}")]
288+
Postcard(#[from] postcard::Error),
289289
#[error("unknown language code '{0}'; expected one of: ja zh ko en ru ar th hi he el")]
290290
UnknownLanguageCode(String),
291291
}

0 commit comments

Comments
 (0)