Skip to content

Commit f6d92b0

Browse files
committed
feat: Add RLM embedder, tokenizer, eval gates, trace writer, and security hardening
New modules (4 files, 2,359 lines): - rlm_embedder.rs (743L): RLM-style recursive sentence transformer with 3 variants (query-conditioned, corpus-conditioned, contradiction-aware twin), merge rule, BaseEmbedder/NeighborRetriever traits, 14 tests - tokenizer.rs (418L): BPE tokenizer with GGUF vocab loading, encode/decode, special token handling, 10 tests - trace.rs (554L): JSONL trace writer for routing, citation, refusal decisions, jaccard similarity, manual JSON serialization, 10 tests - eval.rs (644L): Three behavioral gates (routing correctness >= 0.85, citation precision >= 0.90, refusal F1 >= 0.85), EvalSuite, 12 tests Documentation: - AD-24: RLM-Style Recursive Sentence Transformer Embedder — 3 variants, merge rule, training strategy, evaluation criteria, appliance fit - DDD v2.6: 8 new ubiquitous language terms, 4 new open questions (#31-34) - 3 new positive consequences (#31-33) for RLM embeddings Security hardening (across 6 existing files): - Path traversal validation in GGUF export - Division-by-zero epsilon guards in quantizer - Bounds validation on public function inputs - NaN-safe softmax with -inf handling 138 tests pass, 0 compilation errors. Total bitnet module: 9,632 lines across 16 files. https://claude.ai/code/session_011nTcGcn49b8YKJRVoh4TaK
1 parent 14ed07e commit f6d92b0

13 files changed

Lines changed: 2614 additions & 21 deletions

crates/ruvllm/src/bitnet/backend.rs

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,13 @@ impl BitNetBackend {
538538
// --- Expert forward + weighted sum ---
539539
let mut moe_output = vec![0.0f32; hidden];
540540
for (&eidx, &eweight) in expert_indices.iter().zip(expert_weights.iter()) {
541+
if eidx >= layer.experts.len() {
542+
return Err(RuvLLMError::Model(format!(
543+
"Expert index {} out of bounds (layer has {} experts)",
544+
eidx,
545+
layer.experts.len()
546+
)));
547+
}
541548
let expert_out =
542549
self.expert_forward(&normed_ffn, &layer.experts[eidx], config)?;
543550
for (o, &e) in moe_output.iter_mut().zip(expert_out.iter()) {
@@ -573,7 +580,12 @@ impl BitNetBackend {
573580
) -> Result<(Vec<usize>, Vec<f32>)> {
574581
let num_experts = config.num_experts;
575582
let hidden = config.hidden_size;
576-
let top_k = config.active_experts;
583+
// Clamp top_k to num_experts to prevent selecting more experts than exist
584+
let top_k = config.active_experts.min(num_experts);
585+
586+
if num_experts == 0 {
587+
return Ok((vec![], vec![]));
588+
}
577589

578590
// Gate: scores[e] = dot(hidden_states, gate_weight[e])
579591
let mut scores = vec![0.0f32; num_experts];
@@ -926,17 +938,41 @@ fn rms_norm_inplace(x: &mut [f32], weight: &[f32], eps: f32) {
926938
}
927939

928940
/// In-place softmax.
941+
///
942+
/// Guards against NaN propagation: if all inputs are -inf or NaN,
943+
/// the result is a uniform distribution (1/n for each element).
929944
fn softmax_inplace(x: &mut [f32]) {
945+
if x.is_empty() {
946+
return;
947+
}
948+
930949
let max_val = x.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
950+
951+
// Guard: if max_val is -inf or NaN, no valid scores exist.
952+
// Fall back to uniform distribution.
953+
if max_val.is_nan() || max_val.is_infinite() && max_val.is_sign_negative() {
954+
let uniform = 1.0 / x.len() as f32;
955+
for v in x.iter_mut() {
956+
*v = uniform;
957+
}
958+
return;
959+
}
960+
931961
let mut sum_exp = 0.0f32;
932962
for v in x.iter_mut() {
933963
*v = (*v - max_val).exp();
934964
sum_exp += *v;
935965
}
936-
if sum_exp > 0.0 {
966+
// Guard: if sum_exp is zero, NaN, or subnormal, fall back to uniform
967+
if !sum_exp.is_normal() || sum_exp <= 0.0 {
968+
let uniform = 1.0 / x.len() as f32;
937969
for v in x.iter_mut() {
938-
*v /= sum_exp;
970+
*v = uniform;
939971
}
972+
return;
973+
}
974+
for v in x.iter_mut() {
975+
*v /= sum_exp;
940976
}
941977
}
942978

crates/ruvllm/src/bitnet/dequantize.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ pub fn compute_dequant_error(original: &[f32], dequantized: &[f32]) -> (f32, f32
116116
"Arrays must have same length"
117117
);
118118

119+
// Guard against empty inputs to avoid division by zero
120+
if original.is_empty() {
121+
return (0.0, 0.0, 0.0);
122+
}
123+
119124
let mut sum_abs_error = 0.0f32;
120125
let mut sum_sq_error = 0.0f32;
121126
let mut max_error = 0.0f32;

0 commit comments

Comments
 (0)