Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
37a43a1
Add image detection
rafkamicheldaou Nov 12, 2025
696a225
Extend to server, router integ and classification
NPranitha Nov 13, 2025
3d35bbe
Added image generation categories
rafkamicheldaou Nov 15, 2025
8f2410a
Added code to detect image generation- pending routing
rafkamicheldaou Nov 15, 2025
6f3c346
Connect image model
NPranitha Nov 17, 2025
a3717ba
Add fine-tuned BERT model
NPranitha Nov 18, 2025
6e76508
Add gemini integration
NPranitha Nov 22, 2025
4245f49
Fix fine-tuned model loading
NPranitha Nov 24, 2025
68ff84e
Test scripts to validate integrating model end-to-end
rafkamicheldaou Nov 24, 2025
8ffc658
Added envoy for DNS resolution to launch on collab
rafkamicheldaou Nov 29, 2025
fb35a20
Added fine tune model to make downloads file
rafkamicheldaou Nov 29, 2025
857116b
Added unit tests
rafkamicheldaou Nov 29, 2025
b2e161a
rust vit
NPranitha Dec 14, 2025
60b5d57
ffi for vit
NPranitha Dec 14, 2025
ba4e87e
vit test
NPranitha Dec 14, 2025
6170a75
go bindings
NPranitha Dec 14, 2025
842725e
multimodal support
NPranitha Dec 14, 2025
a18a3ba
multimodal test
NPranitha Dec 14, 2025
51e1a79
config changes
NPranitha Dec 14, 2025
ec012da
feat: Add multimodal vision transformer support
NPranitha Dec 14, 2025
6db43e5
Resolve conflicts and integrate multimodal support
NPranitha Dec 14, 2025
ea59deb
Documentation
NPranitha Dec 14, 2025
e6f2f52
chore: update performance baselines (nightly run)
actions-user Dec 15, 2025
34ec770
chore: update performance baselines (nightly run)
actions-user Dec 16, 2025
5706e60
chore: update performance baselines (nightly run)
actions-user Dec 17, 2025
b75c9d8
chore: update performance baselines (nightly run)
actions-user Dec 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
560 changes: 559 additions & 1 deletion candle-binding/Cargo.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions candle-binding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ rayon = "1.8"
parking_lot = "0.12"
crossbeam-channel = "0.5" # Efficient multi-channel select for scheduler wakeup

# Image processing for vision transformer
image = "0.25" # For JPEG/PNG decoding
resize = "0.8" # For image resizing

[dev-dependencies]
rstest = "0.18"
tokio = { version = "1.0", features = ["full"] }
Expand Down
62 changes: 62 additions & 0 deletions candle-binding/semantic-router.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ typedef struct {
float processing_time_ms; // Processing time in milliseconds
} EmbeddingResult;

// Vision transformer functions
extern bool init_vision_encoder_ffi(const char* model_id, const char* device_type);
extern EmbeddingResult* get_image_embedding(const unsigned char* image_data, size_t data_len, const char* mime_type);
extern void free_image_embedding_result(EmbeddingResult* result);

// Embedding similarity result structure
typedef struct {
float similarity; // Cosine similarity score (-1.0 to 1.0)
Expand Down Expand Up @@ -3007,3 +3012,60 @@ func extractLabelAndCategories(content string) (string, []string) {
// ================================================================================================
// END OF LORA UNIFIED CLASSIFIER GO BINDINGS
// ================================================================================================

// ================================================================================================
// VISION TRANSFORMER GO BINDINGS
// ================================================================================================

// InitVisionEncoder initializes the CLIP vision transformer encoder
func InitVisionEncoder(modelID, deviceType string) error {
cModelID := C.CString(modelID)
defer C.free(unsafe.Pointer(cModelID))

cDeviceType := C.CString(deviceType)
defer C.free(unsafe.Pointer(cDeviceType))

success := C.init_vision_encoder_ffi(cModelID, cDeviceType)
if !success {
return fmt.Errorf("failed to initialize vision encoder with model: %s", modelID)
}

return nil
}

// GetImageEmbedding extracts an embedding vector from an image
func GetImageEmbedding(imageData []byte, mimeType string) ([]float32, error) {
if len(imageData) == 0 {
return nil, fmt.Errorf("image data cannot be empty")
}

cMimeType := C.CString(mimeType)
defer C.free(unsafe.Pointer(cMimeType))

result := C.get_image_embedding(
(*C.uchar)(unsafe.Pointer(&imageData[0])),
C.size_t(len(imageData)),
cMimeType,
)
defer C.free_image_embedding_result(result)

if bool(result.error) {
return nil, fmt.Errorf("image embedding extraction failed")
}

length := int(result.length)
embedding := make([]float32, length)

if length > 0 {
cFloats := (*[1 << 30]C.float)(unsafe.Pointer(result.data))[:length:length]
for i := 0; i < length; i++ {
embedding[i] = float32(cFloats[i])
}
}

return embedding, nil
}

// ================================================================================================
// END OF VISION TRANSFORMER GO BINDINGS
// ================================================================================================
25 changes: 17 additions & 8 deletions candle-binding/src/ffi/classify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use std::ffi::{c_char, CStr};
use std::sync::{Arc, OnceLock};

use crate::ffi::init::{
LORA_INTENT_CLASSIFIER, LORA_JAILBREAK_CLASSIFIER, PARALLEL_LORA_ENGINE, UNIFIED_CLASSIFIER,
LORA_INTENT_CLASSIFIER, LORA_JAILBREAK_CLASSIFIER, PARALLEL_LORA_ENGINE, UNIFIED_CLASSIFIER, BERT_CLASSIFIER, BERT_PII_CLASSIFIER, BERT_JAILBREAK_CLASSIFIER
};
// Import DeBERTa classifier for jailbreak detection
use super::init::DEBERTA_JAILBREAK_CLASSIFIER;
Expand Down Expand Up @@ -56,9 +56,6 @@ pub fn load_id2label_from_config(

// Legacy classifiers for backward compatibility using OnceLock pattern
// These are kept for old API paths but new code should use the dual-path architecture
static BERT_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
static BERT_PII_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
static BERT_JAILBREAK_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();

/// Classify text using basic classifier
///
Expand All @@ -82,10 +79,22 @@ pub extern "C" fn classify_text(text: *const c_char) -> ClassificationResult {
if let Some(classifier) = BERT_CLASSIFIER.get() {
let classifier = classifier.clone(); // Cheap Arc clone for concurrent access
match classifier.classify_text(text) {
Ok((class_idx, confidence)) => ClassificationResult {
predicted_class: class_idx as i32,
confidence,
label: std::ptr::null_mut(),
Ok((class_idx, confidence)) => {
// Validate the result
let num_classes = classifier.num_classes();
if class_idx >= num_classes {
eprintln!("Error: Invalid class index {} (expected < {})", class_idx, num_classes);
default_result
} else if !confidence.is_finite() || confidence < 0.0 || confidence > 1.0 {
eprintln!("Error: Invalid confidence value {} (expected 0.0-1.0)", confidence);
default_result
} else {
ClassificationResult {
predicted_class: class_idx as i32,
confidence,
label: std::ptr::null_mut(),
}
}
},
Err(e) => {
eprintln!("Error classifying text: {e}");
Expand Down
6 changes: 3 additions & 3 deletions candle-binding/src/ffi/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ use crate::BertClassifier;
// - Thread-safe initialization guarantee
// - No dependency on lazy_static
pub static BERT_SIMILARITY: OnceLock<Arc<BertSimilarity>> = OnceLock::new();
static BERT_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
static BERT_PII_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
static BERT_JAILBREAK_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
pub static BERT_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
pub static BERT_PII_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
pub static BERT_JAILBREAK_CLASSIFIER: OnceLock<Arc<BertClassifier>> = OnceLock::new();
// DeBERTa v3 jailbreak/prompt injection classifier (exported for use in classify.rs)
pub static DEBERTA_JAILBREAK_CLASSIFIER: OnceLock<
Arc<crate::model_architectures::traditional::deberta_v3::DebertaV3Classifier>,
Expand Down
4 changes: 4 additions & 0 deletions candle-binding/src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub mod similarity; // similarity functions
pub mod tokenization; // tokenization function
pub mod types; // C structure definitions
pub mod validation; // parameter validation functions
pub mod vision; // vision transformer functions

pub mod memory_safety; // Dual-path memory safety system
pub mod state_manager; // Global state management system
Expand All @@ -27,6 +28,7 @@ pub use similarity::*;
pub use tokenization::*;
pub use types::*;
pub use validation::*;
pub use vision::*;

pub use memory_safety::*;
pub use state_manager::*;
Expand All @@ -45,3 +47,5 @@ pub mod oncelock_concurrent_test;
pub mod state_manager_test;
#[cfg(test)]
pub mod validation_test;
#[cfg(test)]
pub mod vision_test;
36 changes: 35 additions & 1 deletion candle-binding/src/ffi/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ pub struct ClassificationResult {
#[repr(C)]
#[derive(Debug)]
pub struct ClassificationResultWithProbs {
pub confidence: f32,
pub predicted_class: i32,
pub confidence: f32,
pub label: *mut c_char,
pub probabilities: *mut f32,
pub num_classes: i32,
Expand All @@ -39,6 +39,40 @@ pub struct EmbeddingResult {
pub processing_time_ms: f32,
}

impl EmbeddingResult {
/// Create a success result with embedding data
pub fn success(embedding: Vec<f32>) -> *mut Self {
let len = embedding.len();
let mut boxed_embedding = embedding.into_boxed_slice();
let data_ptr = boxed_embedding.as_mut_ptr();
std::mem::forget(boxed_embedding);

let result = Box::new(Self {
data: data_ptr,
length: len as i32,
error: false,
model_type: -1, // Vision model
sequence_length: 0,
processing_time_ms: 0.0,
});
Box::into_raw(result)
}

/// Create an error result
pub fn error(_message: &str) -> *mut Self {
// For now, just return error flag
let result = Box::new(Self {
data: std::ptr::null_mut(),
length: 0,
error: true,
model_type: -1,
sequence_length: 0,
processing_time_ms: 0.0,
});
Box::into_raw(result)
}
}

/// Tokenization result structure (matches Go C struct)
#[repr(C)]
#[derive(Debug)]
Expand Down
Loading