feat(embedding): multi-provider abstraction layer (#62)

Siddhant-K-code · ona-agent · web-flow · commit 1c6707122b13 · 2026-05-02T20:00:33.000+05:30
Add embedding.NewProvider factory supporting OpenAI, Ollama, and Cohere
via a unified ProviderConfig. Providers self-register via init() so
callers only need a blank import.

New packages:
  pkg/embedding/ollama  - local Ollama server (/api/embeddings)
  pkg/embedding/cohere  - Cohere API (embed-english-v3.0 default)

New files:
  pkg/embedding/registry.go       - NewProvider, RegisterFactory,
                                    ProviderConfig, SupportedProviders
  pkg/embedding/openai/register.go - registers OpenAI into the factory
  pkg/embedding/ollama/register.go - registers Ollama into the factory
  pkg/embedding/cohere/register.go - registers Cohere into the factory
  pkg/embedding/registry_test.go  - custom provider, unknown type,
                                    ollama resolution, cache wrapping

CacheSize=-1 disables the in-memory cache; 0 uses the default (10k).
Existing cmd/ code continues to use openai.NewClient directly and is
unaffected.

Co-authored-by: Ona &lt;no-reply@ona.com&gt;
diff --git a/README.md b/README.md
@@ -904,6 +904,40 @@ Pattern → annotation mapping:
 - **Code Review** - Blast radius analysis for PRs
 - **Enterprise** - Deterministic outputs with source attribution
 
+## Embedding Providers
+
+Distill supports multiple embedding backends via a unified factory. Import the provider package to register it, then call `embedding.NewProvider`:
+
+```go
+import (
+    "github.com/Siddhant-K-code/distill/pkg/embedding"
+    _ "github.com/Siddhant-K-code/distill/pkg/embedding/openai"  // register OpenAI
+    _ "github.com/Siddhant-K-code/distill/pkg/embedding/ollama"  // register Ollama
+    _ "github.com/Siddhant-K-code/distill/pkg/embedding/cohere"  // register Cohere
+)
+
+provider, err := embedding.NewProvider(embedding.ProviderConfig{
+    Type:      embedding.ProviderOllama,   // "openai" | "ollama" | "cohere"
+    BaseURL:   "http://localhost:11434",   // optional override
+    Model:     "nomic-embed-text",         // optional override
+    CacheSize: 10000,                      // 0 = default (10k), -1 = disabled
+})
+```
+
+| Provider | Type string | Default model | Notes |
+|----------|-------------|---------------|-------|
+| OpenAI | `openai` | `text-embedding-3-small` | Requires `OPENAI_API_KEY` |
+| Ollama | `ollama` | `nomic-embed-text` | Local server, no API key |
+| Cohere | `cohere` | `embed-english-v3.0` | Requires `COHERE_API_KEY` |
+
+Custom providers can be registered at startup:
+
+```go
+embedding.RegisterFactory("my-provider", func(cfg embedding.ProviderConfig) (embedding.Provider, error) {
+    return myProvider{apiKey: cfg.APIKey}, nil
+})
+```
+
 ## Roadmap
 
 Distill is evolving from a dedup utility into a context intelligence layer. Here's what's next:
@@ -922,6 +956,7 @@ Distill is evolving from a dedup utility into a context intelligence layer. Here
 | **Prefix stability validator** | [#48](https://github.com/Siddhant-K-code/distill/issues/48) | Shipped | `StabilityValidator` tracks prefix hashes across requests and detects dynamic content (timestamps, request IDs, UUIDs) bleeding into cached prefixes. |
 | **Per-call-site hit rate tracking** | [#47](https://github.com/Siddhant-K-code/distill/issues/47) | Shipped | `CallSiteTracker` records Anthropic cache usage per call site; `AllStats()` returns worst performers first. |
 | **TTL-aware cache tracker** | [#49](https://github.com/Siddhant-K-code/distill/issues/49) | Shipped | `TTLTracker` monitors Anthropic's 5-minute cache TTL per prefix hash. `ScheduleDeadline` tells batch jobs the latest safe time to send the next request. |
+| **Multi-provider embedding abstraction** | [#33](https://github.com/Siddhant-K-code/distill/issues/33) | Shipped | `embedding.NewProvider` factory supports OpenAI, Ollama, and Cohere via a unified `ProviderConfig`. Custom providers register via `RegisterFactory`. |
 
 ### Code Intelligence
 
diff --git a/pkg/embedding/cohere/client.go b/pkg/embedding/cohere/client.go
@@ -0,0 +1,159 @@
+// Package cohere provides an embedding.Provider backed by the Cohere API.
+package cohere
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/Siddhant-K-code/distill/pkg/embedding"
+)
+
+const (
+	defaultBaseURL = "https://api.cohere.ai/v1"
+	defaultModel   = "embed-english-v3.0"
+	defaultTimeout = 30 * time.Second
+)
+
+// InputType controls how Cohere classifies the input for retrieval tasks.
+type InputType string
+
+const (
+	InputTypeSearchDocument InputType = "search_document"
+	InputTypeSearchQuery    InputType = "search_query"
+	InputTypeClassification InputType = "classification"
+	InputTypeClustering     InputType = "clustering"
+)
+
+// Model dimensions for common Cohere embedding models.
+var modelDimensions = map[string]int{
+	"embed-english-v3.0":       1024,
+	"embed-multilingual-v3.0":  1024,
+	"embed-english-light-v3.0": 384,
+}
+
+// Config holds Cohere client configuration.
+type Config struct {
+	// APIKey is the Cohere API key (required).
+	APIKey string
+
+	// Model is the embedding model. Default: embed-english-v3.0
+	Model string
+
+	// InputType controls retrieval optimisation. Default: search_document
+	InputType InputType
+
+	// Timeout for API requests. Default: 30s
+	Timeout time.Duration
+}
+
+// Client implements embedding.Provider for Cohere.
+type Client struct {
+	cfg        Config
+	httpClient *http.Client
+	dimension  int
+}
+
+// NewClient creates a new Cohere embedding client.
+func NewClient(cfg Config) (*Client, error) {
+	if cfg.APIKey == "" {
+		return nil, fmt.Errorf("Cohere API key is required")
+	}
+	if cfg.Model == "" {
+		cfg.Model = defaultModel
+	}
+	if cfg.InputType == "" {
+		cfg.InputType = InputTypeSearchDocument
+	}
+	if cfg.Timeout <= 0 {
+		cfg.Timeout = defaultTimeout
+	}
+	dim := modelDimensions[cfg.Model]
+	return &Client{
+		cfg:        cfg,
+		httpClient: &http.Client{Timeout: cfg.Timeout},
+		dimension:  dim,
+	}, nil
+}
+
+type embedRequest struct {
+	Texts     []string  `json:"texts"`
+	Model     string    `json:"model"`
+	InputType InputType `json:"input_type"`
+}
+
+type embedResponse struct {
+	Embeddings [][]float32 `json:"embeddings"`
+}
+
+// Embed returns the embedding for a single text.
+func (c *Client) Embed(ctx context.Context, text string) ([]float32, error) {
+	if text == "" {
+		return nil, embedding.ErrEmptyInput
+	}
+	results, err := c.EmbedBatch(ctx, []string{text})
+	if err != nil {
+		return nil, err
+	}
+	return results[0], nil
+}
+
+// EmbedBatch embeds multiple texts in a single API call.
+func (c *Client) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) {
+	if len(texts) == 0 {
+		return nil, nil
+	}
+
+	body, err := json.Marshal(embedRequest{
+		Texts:     texts,
+		Model:     c.cfg.Model,
+		InputType: c.cfg.InputType,
+	})
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost,
+		defaultBaseURL+"/embed", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("build request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+c.cfg.APIKey)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("cohere request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == http.StatusTooManyRequests {
+		return nil, embedding.ErrRateLimited
+	}
+	if resp.StatusCode == http.StatusUnauthorized {
+		return nil, embedding.ErrInvalidAPIKey
+	}
+	if resp.StatusCode != http.StatusOK {
+		b, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("cohere %d: %s", resp.StatusCode, string(b))
+	}
+
+	var result embedResponse
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	if len(result.Embeddings) != len(texts) {
+		return nil, fmt.Errorf("expected %d embeddings, got %d", len(texts), len(result.Embeddings))
+	}
+	return result.Embeddings, nil
+}
+
+// Dimension returns the embedding dimension for the configured model.
+func (c *Client) Dimension() int { return c.dimension }
+
+// ModelName returns the configured model name.
+func (c *Client) ModelName() string { return c.cfg.Model }
diff --git a/pkg/embedding/cohere/register.go b/pkg/embedding/cohere/register.go
@@ -0,0 +1,14 @@
+package cohere
+
+import (
+	"github.com/Siddhant-K-code/distill/pkg/embedding"
+)
+
+func init() {
+	embedding.RegisterFactory(embedding.ProviderCohere, func(cfg embedding.ProviderConfig) (embedding.Provider, error) {
+		return NewClient(Config{
+			APIKey: cfg.APIKey,
+			Model:  cfg.Model,
+		})
+	})
+}
diff --git a/pkg/embedding/ollama/client.go b/pkg/embedding/ollama/client.go
@@ -0,0 +1,123 @@
+// Package ollama provides an embedding.Provider backed by a local Ollama server.
+package ollama
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/Siddhant-K-code/distill/pkg/embedding"
+)
+
+const (
+	defaultBaseURL = "http://localhost:11434"
+	defaultModel   = "nomic-embed-text"
+	defaultTimeout = 60 * time.Second
+)
+
+// Config holds Ollama client configuration.
+type Config struct {
+	// BaseURL is the Ollama server URL. Default: http://localhost:11434
+	BaseURL string
+
+	// Model is the embedding model to use. Default: nomic-embed-text
+	Model string
+
+	// Timeout for API requests. Default: 60s (local models can be slow).
+	Timeout time.Duration
+}
+
+// Client implements embedding.Provider for Ollama.
+type Client struct {
+	cfg        Config
+	httpClient *http.Client
+}
+
+// NewClient creates a new Ollama embedding client.
+func NewClient(cfg Config) *Client {
+	if cfg.BaseURL == "" {
+		cfg.BaseURL = defaultBaseURL
+	}
+	if cfg.Model == "" {
+		cfg.Model = defaultModel
+	}
+	if cfg.Timeout <= 0 {
+		cfg.Timeout = defaultTimeout
+	}
+	return &Client{
+		cfg:        cfg,
+		httpClient: &http.Client{Timeout: cfg.Timeout},
+	}
+}
+
+type embedRequest struct {
+	Model  string `json:"model"`
+	Prompt string `json:"prompt"`
+}
+
+type embedResponse struct {
+	Embedding []float32 `json:"embedding"`
+}
+
+// Embed returns the embedding for a single text.
+func (c *Client) Embed(ctx context.Context, text string) ([]float32, error) {
+	if text == "" {
+		return nil, embedding.ErrEmptyInput
+	}
+
+	body, err := json.Marshal(embedRequest{Model: c.cfg.Model, Prompt: text})
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost,
+		c.cfg.BaseURL+"/api/embeddings", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("build request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("ollama request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		b, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("ollama %d: %s", resp.StatusCode, string(b))
+	}
+
+	var result embedResponse
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+	if len(result.Embedding) == 0 {
+		return nil, fmt.Errorf("ollama returned empty embedding")
+	}
+	return result.Embedding, nil
+}
+
+// EmbedBatch embeds multiple texts sequentially (Ollama has no batch API).
+func (c *Client) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error) {
+	results := make([][]float32, len(texts))
+	for i, text := range texts {
+		emb, err := c.Embed(ctx, text)
+		if err != nil {
+			return nil, fmt.Errorf("embed[%d]: %w", i, err)
+		}
+		results[i] = emb
+	}
+	return results, nil
+}
+
+// Dimension returns the embedding dimension. Ollama models vary; we return
+// 0 to indicate it is determined at runtime from the first response.
+func (c *Client) Dimension() int { return 0 }
+
+// ModelName returns the configured model name.
+func (c *Client) ModelName() string { return c.cfg.Model }
diff --git a/pkg/embedding/ollama/register.go b/pkg/embedding/ollama/register.go
@@ -0,0 +1,17 @@
+package ollama
+
+import (
+	"time"
+
+	"github.com/Siddhant-K-code/distill/pkg/embedding"
+)
+
+func init() {
+	embedding.RegisterFactory(embedding.ProviderOllama, func(cfg embedding.ProviderConfig) (embedding.Provider, error) {
+		return NewClient(Config{
+			BaseURL: cfg.BaseURL,
+			Model:   cfg.Model,
+			Timeout: time.Duration(0), // uses defaultTimeout
+		}), nil
+	})
+}
diff --git a/pkg/embedding/openai/register.go b/pkg/embedding/openai/register.go
@@ -0,0 +1,15 @@
+package openai
+
+import (
+	"github.com/Siddhant-K-code/distill/pkg/embedding"
+)
+
+func init() {
+	embedding.RegisterFactory(embedding.ProviderOpenAI, func(cfg embedding.ProviderConfig) (embedding.Provider, error) {
+		return NewClient(Config{
+			APIKey:  cfg.APIKey,
+			Model:   cfg.Model,
+			BaseURL: cfg.BaseURL,
+		})
+	})
+}
diff --git a/pkg/embedding/registry.go b/pkg/embedding/registry.go
diff --git a/pkg/embedding/registry_test.go b/pkg/embedding/registry_test.go