@@ -1482,34 +1482,33 @@ func TestGetEmbeddingSmart(t *testing.T) {
14821482 }
14831483
14841484 t .Run ("ShortTextHighLatency" , func (t * testing.T ) {
1485- // Short text with high latency priority - uses Qwen3 (1024) since Gemma is not available
1485+ // Short text with high latency priority should use Gemma (768)
14861486 text := "Hello world"
14871487 embedding , err := GetEmbeddingSmart (text , 0.3 , 0.8 )
14881488
14891489 if err != nil {
14901490 t .Fatalf ("GetEmbeddingSmart failed: %v" , err )
14911491 }
14921492
1493- // Expect Qwen3 (1024) dimension since Gemma is not available
1494- if len (embedding ) != 1024 {
1495- t .Errorf ("Expected 1024-dim embedding, got %d" , len (embedding ))
1493+ if len (embedding ) != 768 {
1494+ t .Errorf ("Expected 768-dim embedding, got %d" , len (embedding ))
14961495 }
14971496
14981497 t .Logf ("Short text embedding generated: dim=%d" , len (embedding ))
14991498 })
15001499
15011500 t .Run ("MediumTextBalanced" , func (t * testing.T ) {
1502- // Medium text with balanced priorities - uses Qwen3 (1024) since Gemma is not available
1501+ // Medium text with balanced priorities - may select Qwen3 (1024) or Gemma (768)
15031502 text := strings .Repeat ("This is a medium length text with enough words to exceed 512 tokens. " , 10 )
15041503 embedding , err := GetEmbeddingSmart (text , 0.5 , 0.5 )
15051504
15061505 if err != nil {
15071506 t .Fatalf ("GetEmbeddingSmart failed: %v" , err )
15081507 }
15091508
1510- // Expect Qwen3 (1024) dimension since Gemma is not available
1511- if len (embedding ) != 1024 {
1512- t .Errorf ("Expected 1024-dim embedding, got %d" , len (embedding ))
1509+ // Accept both Qwen3 (1024) and Gemma (768) dimensions
1510+ if len (embedding ) != 768 && len ( embedding ) != 1024 {
1511+ t .Errorf ("Expected 768 or 1024-dim embedding, got %d" , len (embedding ))
15131512 }
15141513
15151514 t .Logf ("Medium text embedding generated: dim=%d" , len (embedding ))
@@ -1569,9 +1568,9 @@ func TestGetEmbeddingSmart(t *testing.T) {
15691568 return
15701569 }
15711570
1572- // Expect Qwen3 (1024) since Gemma is not available
1573- if len (embedding ) != 1024 {
1574- t .Errorf ("Expected 1024-dim embedding, got %d" , len (embedding ))
1571+ // Smart routing may select Qwen3 (1024) or Gemma (768) based on priorities
1572+ if len (embedding ) != 768 && len ( embedding ) != 1024 {
1573+ t .Errorf ("Expected 768 or 1024-dim embedding, got %d" , len (embedding ))
15751574 }
15761575 t .Logf ("Priority test %s: generated %d-dim embedding" , tc .desc , len (embedding ))
15771576 })
@@ -1594,9 +1593,9 @@ func TestGetEmbeddingSmart(t *testing.T) {
15941593 continue
15951594 }
15961595
1597- // Expect Qwen3 (1024) since Gemma is not available
1598- if len (embedding ) != 1024 {
1599- t .Errorf ("Iteration %d: Expected 1024-dim embedding, got %d" , i , len (embedding ))
1596+ // Smart routing may select Qwen3 (1024) or Gemma (768)
1597+ if len (embedding ) != 768 && len ( embedding ) != 1024 {
1598+ t .Errorf ("Iteration %d: Expected 768 or 1024-dim embedding, got %d" , i , len (embedding ))
16001599 }
16011600
16021601 // Verify no nil pointers
@@ -1635,12 +1634,11 @@ func BenchmarkGetEmbeddingSmart(b *testing.B) {
16351634}
16361635
16371636// Test constants for embedding models (Phase 4.2)
1638- // Note: Gemma model is gated and requires HF_TOKEN, so tests use Qwen3 only
16391637const (
16401638 Qwen3EmbeddingModelPath = "../models/Qwen3-Embedding-0.6B"
1641- GemmaEmbeddingModelPath = "" // Gemma is gated, not used in CI tests
1639+ GemmaEmbeddingModelPath = "../models/embeddinggemma-300m"
16421640 TestEmbeddingText = "This is a test sentence for embedding generation"
1643- TestLongContextText = "This is a longer text that might benefit from long-context embedding models like Qwen3"
1641+ TestLongContextText = "This is a longer text that might benefit from long-context embedding models like Qwen3 or Gemma "
16441642)
16451643
16461644// Test constants for Qwen3 Multi-LoRA
@@ -1702,8 +1700,22 @@ func TestInitEmbeddingModels(t *testing.T) {
17021700 })
17031701
17041702 t .Run ("InitGemmaOnly" , func (t * testing.T ) {
1705- // Gemma is a gated model requiring HF_TOKEN, skip in CI
1706- t .Skip ("Skipping Gemma-only test: Gemma is a gated model requiring HF_TOKEN" )
1703+ err := InitEmbeddingModels ("" , GemmaEmbeddingModelPath , true )
1704+ if err != nil {
1705+ t .Logf ("InitEmbeddingModels (Gemma only) returned error (may already be initialized): %v" , err )
1706+
1707+ // Verify functionality
1708+ _ , testErr := GetEmbeddingSmart ("test" , 0.5 , 0.5 )
1709+ if testErr == nil {
1710+ t .Log ("✓ ModelFactory is functional (already initialized)" )
1711+ } else {
1712+ if isModelInitializationError (testErr ) {
1713+ t .Skipf ("Skipping test due to model unavailability: %v" , testErr )
1714+ }
1715+ }
1716+ } else {
1717+ t .Log ("✓ Gemma model initialized successfully" )
1718+ }
17071719 })
17081720
17091721 t .Run ("InitWithInvalidPaths" , func (t * testing.T ) {
@@ -1785,16 +1797,16 @@ func TestGetEmbeddingWithDim(t *testing.T) {
17851797
17861798 t .Run ("OversizedDimension" , func (t * testing.T ) {
17871799 // Test graceful degradation when requested dimension exceeds model capacity
1788- // Qwen3: 1024, so 2048 should fall back to full dimension
1800+ // Qwen3: 1024, Gemma: 768, so 2048 should fall back to full dimension
17891801 embedding , err := GetEmbeddingWithDim (TestEmbeddingText , 0.5 , 0.5 , 2048 )
17901802 if err != nil {
17911803 t .Errorf ("Should gracefully handle oversized dimension, got error: %v" , err )
17921804 return
17931805 }
17941806
1795- // Should return full dimension (1024 for Qwen3)
1796- if len (embedding ) != 1024 {
1797- t .Errorf ("Expected full dimension (1024), got %d" , len (embedding ))
1807+ // Should return full dimension (1024 for Qwen3 or 768 for Gemma )
1808+ if len (embedding ) != 1024 && len ( embedding ) != 768 {
1809+ t .Errorf ("Expected full dimension (1024 or 768 ), got %d" , len (embedding ))
17981810 } else {
17991811 t .Logf ("✓ Oversized dimension gracefully degraded to full dimension: %d" , len (embedding ))
18001812 }
@@ -1889,9 +1901,6 @@ func TestEmbeddingPriorityRouting(t *testing.T) {
18891901 if err != nil {
18901902 t .Fatalf ("Failed to initialize embedding models: %v" , err )
18911903 }
1892-
1893- // Note: These tests use Matryoshka dimension truncation (768) with Qwen3 model
1894- // The dimension is truncated from Qwen3's full 1024 dimensions
18951904 testCases := []struct {
18961905 name string
18971906 text string
@@ -1906,23 +1915,23 @@ func TestEmbeddingPriorityRouting(t *testing.T) {
19061915 qualityPriority : 0.2 ,
19071916 latencyPriority : 0.9 ,
19081917 expectedDim : 768 ,
1909- description : "Uses Qwen3 with Matryoshka 768 truncation " ,
1918+ description : "Should prefer faster embedding model (Gemma > Qwen3) " ,
19101919 },
19111920 {
19121921 name : "HighQualityPriority" ,
19131922 text : strings .Repeat ("Long context text " , 30 ),
19141923 qualityPriority : 0.9 ,
19151924 latencyPriority : 0.2 ,
19161925 expectedDim : 768 ,
1917- description : "Uses Qwen3 with Matryoshka 768 truncation " ,
1926+ description : "Should prefer quality model (Qwen3/Gemma) " ,
19181927 },
19191928 {
19201929 name : "BalancedPriority" ,
19211930 text : "Medium length text for embedding" ,
19221931 qualityPriority : 0.5 ,
19231932 latencyPriority : 0.5 ,
19241933 expectedDim : 768 ,
1925- description : "Uses Qwen3 with Matryoshka 768 truncation " ,
1934+ description : "Should select based on text length " ,
19261935 },
19271936 }
19281937
0 commit comments