open-edge-platform
diff --git a/‎education-ai-suite/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎education-ai-suite/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/docs/dev_guide/file_ingest_and_retrieve/installation.md‎
Lines changed: 3 additions & 5 deletions b/‎education-ai-suite/smart-classroom/content_search/docs/dev_guide/file_ingest_and_retrieve/installation.md‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/install.ps1‎
Lines changed: 4 additions & 22 deletions b/‎education-ai-suite/smart-classroom/content_search/install.ps1‎
Lines changed: 4 additions & 22 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/providers/chromadb_wrapper/chroma_client.py‎
Lines changed: 5 additions & 2 deletions b/‎education-ai-suite/smart-classroom/content_search/providers/chromadb_wrapper/chroma_client.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/__init__.py‎
Lines changed: 9 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/__init__.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/base.py‎
Lines changed: 65 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/base.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/clip_handler.py‎
Lines changed: 121 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/clip_handler.py‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/registry.py‎
Lines changed: 69 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/registry.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/wrapper.py‎
Lines changed: 34 additions & 0 deletions b/‎education-ai-suite/smart-classroom/content_search/providers/file_ingest_and_retrieve/embedding/wrapper.py‎
Lines changed: 34 additions & 0 deletions
@@ -22,3 +22,4 @@ monitoring/executionlogs/
 chroma_data/
 minio_data/
 extracted_images/
+logs/
@@ -7,15 +7,13 @@ For full develop guide and API Reference, please see the [Dev Guide](docs/dev_gu
 
 ### Prerequisites
 
-- **Python 3.10** — only this version is verified on Windows: https://www.python.org/downloads/
-- **Rust compiler** — required by some dependencies: https://rust-lang.org/tools/install
-- **`multimodal_embedding_serving` wheel** — obtain from [this guide](https://github.com/open-edge-platform/edge-ai-libraries/blob/main/microservices/multimodal-embedding-serving/docs/user-guide/wheel-installation.md) (use verified commit `77b812f`). Place the `.whl` file in the `content_search/` folder before running `install.ps1`.
+- **Python 3.12** — verified on Windows: https://www.python.org/downloads/
 
 ### Install System Dependencies
 
 The `install.ps1` will:
-- Creates the Python 3.10 venv
-- Installs `mobileclip`, `salesforce-lavis`, `requirements.txt`, and the `multimodal_embedding_serving` wheel
+- Creates the Python 3.12 venv
+- Installs `requirements_providers.txt`
 - Downloads and installs Tesseract OCR 5.5.0 and adds it to the user PATH
 - Downloads and extracts Poppler 25.12.0 and adds it to the user PATH
 - Install minio to content_search/providers/minio_wrapper folder
 
@@ -16,8 +16,8 @@ $venvPython = Join-Path $PSScriptRoot "venv_content_search\Scripts\python.exe"
 
 # --- Create venv ---
 if (-not (Test-Path $venvPython)) {
-    Write-Host "Creating venv (Python 3.10 required)..."
-    py -3.10 -m venv $venvDir
+    Write-Host "Creating venv (Python 3.12 required)..."
+    py -3.12 -m venv $venvDir
 } else {
     Write-Host "Venv already exists, skipping creation."
 }
@@ -26,26 +26,8 @@ if (-not (Test-Path $venvPython)) {
 Write-Host "Upgrading pip..."
 Invoke-Cmd $venvPython -m pip install --upgrade pip --quiet
 
-Write-Host "Installing mobileclip..."
-Invoke-Cmd $venvPython -m pip install git+https://github.com/apple/ml-mobileclip.git@c16bfe5a4feb424762d6bdf5245539120a4ce9ef#egg=mobileclip --quiet
-
-Write-Host "Installing salesforce-lavis..."
-Invoke-Cmd $venvPython -m pip install salesforce-lavis==1.0.2 --quiet
-
-Write-Host "Installing requirements_310.txt..."
-Invoke-Cmd $venvPython -m pip install -r (Join-Path $PSScriptRoot "requirements_310.txt") --quiet
-
-# --- Install multimodal_embedding_serving wheel ---
-$whl = Get-ChildItem -Path $PSScriptRoot -Filter "multimodal_embedding_serving*.whl" -ErrorAction SilentlyContinue | Select-Object -First 1
-if ($null -eq $whl) {
-    $whl = Get-ChildItem -Path (Join-Path $PSScriptRoot "..") -Filter "multimodal_embedding_serving*.whl" -ErrorAction SilentlyContinue | Select-Object -First 1
-}
-if ($whl) {
-    Write-Host "Installing multimodal_embedding_serving from $($whl.FullName) ..."
-    Invoke-Cmd $venvPython -m pip install $whl.FullName --no-deps --quiet
-} else {
-    Write-Warning "multimodal_embedding_serving wheel not found. Place multimodal_embedding_serving-0.1.1-py3-none-any.whl in content_search/ and re-run."
-}
+Write-Host "Installing requirements_providers.txt..."
+Invoke-Cmd $venvPython -m pip install -r (Join-Path $PSScriptRoot "requirements_providers.txt") --quiet
 
 # --- Install Tesseract OCR ---
 $tesseractExe = "C:\Program Files\Tesseract-OCR\tesseract.exe"
 
@@ -1,9 +1,12 @@
 # Copyright (C) 2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import logging
 import chromadb
 from utils.config_loader import config
 
+logger = logging.getLogger(__name__)
+
 _chroma_cfg = config.content_search.chromadb
 
 
@@ -17,12 +20,12 @@ def load_collection(self, collection_name: str):
             self.collection = self.client.get_or_create_collection(name=collection_name)
             return self.collection
         except Exception as e:
-            print(f"Failed to load collection {collection_name}: {e}")
+            logger.error(f"Failed to load collection '{collection_name}' (is ChromaDB running?): {e}")
             return None
 
     def create_collection(self, collection_name: str = "default"):
         if self.load_collection(collection_name):
-            print(f"Collection {collection_name} already exists and is loaded.")
+            logger.info(f"Collection '{collection_name}' already exists and is loaded.")
             return
 
         self.collection = self.client.create_collection(name=collection_name)
 
@@ -0,0 +1,9 @@
+# Derived from: edge-ai-libraries/microservices/multimodal-embedding-serving/src/
+# Original package: multimodal-embedding-serving v0.1.1
+# Only CLIP-related functionality retained; OpenVINO export removed.
+
+from .registry import get_model_handler
+from .wrapper import EmbeddingModel
+from .clip_handler import CLIPHandler
+
+__all__ = ["get_model_handler", "EmbeddingModel", "CLIPHandler"]
@@ -0,0 +1,65 @@
+# Derived from: edge-ai-libraries/microservices/multimodal-embedding-serving/src/models/base.py
+# Original package: multimodal-embedding-serving v0.1.1
+# Only CLIP-related functionality retained; OpenVINO export removed.
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from abc import ABC, abstractmethod
+from typing import List, Union, Dict, Any
+from PIL import Image
+import numpy as np
+import torch
+
+
+class BaseEmbeddingModel(ABC):
+    """Abstract base class for multimodal embedding models."""
+
+    def __init__(self, model_config: Dict[str, Any]):
+        self.model_config = model_config
+        self.model = None
+        self.tokenizer = None
+        self.preprocess = None
+        self.device = model_config.get("device", "cpu")
+        default_modalities = {"text", "image"}
+        config_modalities = model_config.get("modalities")
+        if config_modalities:
+            self.supported_modalities = set(config_modalities)
+        else:
+            self.supported_modalities = default_modalities
+
+    @abstractmethod
+    def load_model(self) -> None:
+        pass
+
+    @abstractmethod
+    def encode_text(self, texts: Union[str, List[str]]) -> torch.Tensor:
+        pass
+
+    @abstractmethod
+    def encode_image(self, images: Union[Image.Image, List[Image.Image], torch.Tensor]) -> torch.Tensor:
+        pass
+
+    # ------------------------------------------------------------------
+    # Optional capability hooks
+    # ------------------------------------------------------------------
+
+    def supports_text(self) -> bool:
+        return "text" in self.supported_modalities
+
+    def supports_image(self) -> bool:
+        return "image" in self.supported_modalities
+
+    def supports_video(self) -> bool:
+        return "video" in self.supported_modalities or self.supports_image()
+
+    def prepare_query(self, text: str) -> str:
+        return text
+
+    def prepare_documents(self, texts: List[str]) -> List[str]:
+        return texts
+
+    def get_embedding_dim(self) -> int:
+        if self.model is None:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+        return 512  # Default; subclasses should override
@@ -0,0 +1,121 @@
+# Derived from: edge-ai-libraries/microservices/multimodal-embedding-serving/src/models/handlers/clip_handler.py
+# Original package: multimodal-embedding-serving v0.1.1
+# Only CLIP-related functionality retained; OpenVINO export removed.
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from typing import List, Union, Dict, Any, Optional
+
+import torch
+import torch.nn.functional as F
+from PIL import Image
+import open_clip
+
+from .base import BaseEmbeddingModel
+
+logger = logging.getLogger(__name__)
+
+
+class CLIPHandler(BaseEmbeddingModel):
+    """Handler for CLIP models using the open_clip library (PyTorch only)."""
+
+    def __init__(self, model_config: Dict[str, Any]):
+        super().__init__(model_config)
+        self.model_name = model_config["model_name"]
+        self.pretrained = model_config["pretrained"]
+        self.device = model_config.get("device", "CPU")
+        self._embedding_dim: Optional[int] = None
+
+    def load_model(self) -> None:
+        try:
+            self._embedding_dim = None
+            logger.info(f"Loading CLIP model: {self.model_name} with pretrained: {self.pretrained}")
+
+            self.model, _, self.preprocess = open_clip.create_model_and_transforms(
+                self.model_name,
+                pretrained=self.pretrained,
+            )
+            self.tokenizer = open_clip.get_tokenizer(self.model_name)
+            self.model.eval()
+            logger.info(f"CLIP model {self.model_name} loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load CLIP model {self.model_name}: {e}")
+            raise
+
+    def encode_text(self, texts: Union[str, List[str]]) -> torch.Tensor:
+        if isinstance(texts, str):
+            texts = [texts]
+
+        tokenized = self.tokenizer(texts)
+
+        with torch.no_grad():
+            text_features = self.model.encode_text(tokenized)
+
+        text_features = F.normalize(text_features, dim=-1)
+        return text_features
+
+    def encode_image(self, images: Union[Image.Image, List[Image.Image], torch.Tensor]) -> torch.Tensor:
+        if isinstance(images, torch.Tensor):
+            image_tensor = images
+        elif isinstance(images, Image.Image):
+            image_tensor = self.preprocess(images).unsqueeze(0)
+        else:  # list of PIL Images
+            image_tensor = torch.stack([self.preprocess(img) for img in images])
+
+        with torch.no_grad():
+            image_features = self.model.encode_image(image_tensor)
+
+        image_features = F.normalize(image_features, dim=-1)
+        return image_features
+
+    def get_embedding_dim(self) -> int:
+        if self._embedding_dim is not None:
+            return self._embedding_dim
+
+        if self.preprocess is None:
+            raise RuntimeError("Preprocessing pipeline not initialized. Call load_model() first.")
+
+        image_size = self._get_preprocess_image_size()
+        dummy_image = Image.new("RGB", (image_size, image_size), color=0)
+        image_tensor = self.preprocess(dummy_image).unsqueeze(0)
+
+        if self.model is None:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+
+        try:
+            sample_param = next(self.model.parameters())
+            device = sample_param.device
+            dtype = sample_param.dtype
+        except StopIteration:
+            device = torch.device("cpu")
+            dtype = torch.float32
+
+        image_tensor = image_tensor.to(device=device, dtype=dtype)
+        with torch.no_grad():
+            features = self.model.encode_image(image_tensor)
+        self._embedding_dim = int(features.shape[-1])
+
+        return self._embedding_dim
+
+    def _get_preprocess_image_size(self) -> int:
+        default_size = 224
+
+        if self.preprocess is None:
+            return default_size
+
+        transforms = getattr(self.preprocess, "transforms", None)
+        if not transforms:
+            return default_size
+
+        for transform in transforms:
+            size = getattr(transform, "size", None)
+            if size is None:
+                continue
+            if isinstance(size, (tuple, list)) and len(size) > 0:
+                return int(size[0])
+            if isinstance(size, int):
+                return int(size)
+
+        return default_size
@@ -0,0 +1,69 @@
+# Derived from: edge-ai-libraries/microservices/multimodal-embedding-serving/src/models/registry.py
+#              + edge-ai-libraries/microservices/multimodal-embedding-serving/src/models/config.py
+# Original package: multimodal-embedding-serving v0.1.1
+# Only CLIP model configs retained; other handlers removed.
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import os
+from typing import Dict, Any
+
+from .base import BaseEmbeddingModel
+from .clip_handler import CLIPHandler
+
+logger = logging.getLogger(__name__)
+
+# ── CLIP model configurations ────────────────────────────────────────
+CLIP_CONFIGS: Dict[str, Dict[str, Any]] = {
+    "clip-vit-b-32": {
+        "model_name": "ViT-B-32",
+        "pretrained": "laion2b_s34b_b79k",
+        "image_size": 224,
+    },
+    "clip-vit-b-16": {
+        "model_name": "ViT-B-16",
+        "pretrained": "openai",
+        "image_size": 224,
+    },
+    "clip-vit-l-14": {
+        "model_name": "ViT-L-14",
+        "pretrained": "datacomp_xl_s13b_b90k",
+        "image_size": 224,
+    },
+    "clip-vit-h-14": {
+        "model_name": "ViT-H-14",
+        "pretrained": "laion2b_s32b_b79k",
+        "image_size": 224,
+    },
+}
+
+
+def get_model_handler(
+    model_id: str,
+    device: str | None = None,
+) -> BaseEmbeddingModel:
+    """Create a CLIPHandler for the given *model_id*.
+
+    Accepted formats:
+        "CLIP/clip-vit-b-16"   (type/name)
+        "clip-vit-b-16"        (name only)
+    """
+    # Strip optional "CLIP/" prefix
+    if "/" in model_id:
+        _, model_name = model_id.split("/", 1)
+    else:
+        model_name = model_id
+
+    if model_name not in CLIP_CONFIGS:
+        raise ValueError(
+            f"Model '{model_id}' not found. "
+            f"Available: {', '.join(CLIP_CONFIGS)}"
+        )
+
+    config = CLIP_CONFIGS[model_name].copy()
+    config["device"] = device or os.getenv("EMBEDDING_DEVICE", "CPU")
+
+    logger.info(f"Creating CLIPHandler for {model_id} with config: {config}")
+    return CLIPHandler(config)
@@ -0,0 +1,34 @@
+# Derived from: edge-ai-libraries/microservices/multimodal-embedding-serving/src/wrapper.py
+# Original package: multimodal-embedding-serving v0.1.1
+# Only CLIP-related functionality retained; URL/base64/video helpers removed
+# (callers use handler.encode_image() directly).
+#
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import List
+
+from .base import BaseEmbeddingModel
+
+
+class EmbeddingModel:
+    """Application-level wrapper around a model handler."""
+
+    def __init__(self, model_handler: BaseEmbeddingModel):
+        self.handler = model_handler
+        self.model_config = model_handler.model_config
+        self.device = model_handler.device
+        self.supported_modalities = set(model_handler.supported_modalities)
+
+    def embed_query(self, text: str) -> List[float]:
+        prepared_text = self.handler.prepare_query(text)
+        embeddings = self.handler.encode_text([prepared_text])
+        return embeddings[0].tolist()
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        prepared_texts = self.handler.prepare_documents(texts)
+        embeddings = self.handler.encode_text(prepared_texts)
+        return embeddings.tolist()
+
+    def get_embedding_length(self) -> int:
+        return self.handler.get_embedding_dim()