feat: re-enable EmbeddingGemma-300m support

Liav Weiss · Liav Weiss · commit ee7c0c5b50cf · 2025-12-15T22:33:11.000+02:00
Signed-off-by: Liav Weiss &lt;lweiss@lweiss-thinkpadx1carbongen11.raanaii.csb&gt;
diff --git a/.github/workflows/integration-test-docker.yml b/.github/workflows/integration-test-docker.yml
@@ -82,22 +82,14 @@ jobs:
       - name: Download models
         run: |
           echo "Downloading minimal models for CI..."
-          # Authenticate with HuggingFace if token is available
-          # Note: For PRs from forks, HF_TOKEN is not available (GitHub security feature)
-          # The makefile will gracefully skip gated models (e.g., embeddinggemma-300m) if token is missing
-          if [ -n "${HF_TOKEN:-}" ]; then
-            huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential
-            export HUGGINGFACE_HUB_TOKEN="$HF_TOKEN"
-          fi
           make download-models
         env:
           CI: true
           CI_MINIMAL_MODELS: true
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_HUB_ENABLE_HF_TRANSFER: 1
           HF_HUB_DISABLE_TELEMETRY: 1
           # HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m)
-          # For PRs from forks, this will be empty and the makefile will gracefully skip gated models
+          # For PRs from forks, this will be empty and the model_manager will gracefully skip gated models
           # The hf CLI uses HUGGINGFACE_HUB_TOKEN, so we set both for compatibility
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/performance-nightly.yml b/.github/workflows/performance-nightly.yml
@@ -70,11 +70,10 @@ jobs:
       - name: Download models (minimal set for nightly)
         env:
           CI_MINIMAL_MODELS: false
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_HUB_ENABLE_HF_TRANSFER: 1
           HF_HUB_DISABLE_TELEMETRY: 1
           # HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m)
-          # For PRs from forks, this will be empty and the makefile will gracefully skip gated models
+          # For PRs from forks, this will be empty and the model_manager will gracefully skip gated models
           # The hf CLI uses HUGGINGFACE_HUB_TOKEN, so we set both for compatibility
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/performance-test.yml b/.github/workflows/performance-test.yml
@@ -79,11 +79,10 @@ jobs:
       - name: Download models (minimal)
         env:
           CI_MINIMAL_MODELS: true
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_HUB_ENABLE_HF_TRANSFER: 1
           HF_HUB_DISABLE_TELEMETRY: 1
           # HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m)
-          # For PRs from forks, this will be empty and the makefile will gracefully skip gated models
+          # For PRs from forks, this will be empty and the model_manager will gracefully skip gated models
           # The hf CLI uses HUGGINGFACE_HUB_TOKEN, so we set both for compatibility
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml
@@ -140,11 +140,10 @@ jobs:
       - name: Download models (minimal on PRs)
         env:
           CI_MINIMAL_MODELS: ${{ github.event_name == 'pull_request' }}
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_HUB_ENABLE_HF_TRANSFER: 1
           HF_HUB_DISABLE_TELEMETRY: 1
           # HF_TOKEN is required for downloading gated models (e.g., embeddinggemma-300m)
-          # For PRs from forks, this will be empty and the makefile will gracefully skip gated models
+          # For PRs from forks, this will be empty and the model_manager will gracefully skip gated models
           # The hf CLI uses HUGGINGFACE_HUB_TOKEN, so we set both for compatibility
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HUGGINGFACE_HUB_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/config/model_manager/models.lora.yaml b/config/model_manager/models.lora.yaml
@@ -29,3 +29,7 @@ models:
 
   - id: Qwen3-Embedding-0.6B
     repo_id: Qwen/Qwen3-Embedding-0.6B
+
+  # Gated model - requires HF_TOKEN (will gracefully skip if token not available)
+  - id: embeddinggemma-300m
+    repo_id: google/embeddinggemma-300m
diff --git a/config/model_manager/models.minimal.yaml b/config/model_manager/models.minimal.yaml
@@ -10,8 +10,8 @@
 # Equivalent to: make download-models-minimal
 #             or CI_MINIMAL_MODELS=true make download-models
 #
-# Note: This is the minimal set for fast CI runs. Larger models like
-# embeddinggemma-300m are in models.yaml (full set) for local development.
+# Note: This is the minimal set for fast CI runs. Gated models like
+# embeddinggemma-300m will gracefully skip if HF_TOKEN is not available.
 
 cache_dir: "models"
 verify: "size" # Use size for faster CI runs
@@ -56,6 +56,10 @@ models:
   - id: Qwen3-Embedding-0.6B
     repo_id: Qwen/Qwen3-Embedding-0.6B
 
+  # Gated model - requires HF_TOKEN (will gracefully skip if token not available)
+  - id: embeddinggemma-300m
+    repo_id: google/embeddinggemma-300m
+
   # =============================================================================
   # Hallucination Detection - Required for hallucination tests
   # =============================================================================
diff --git a/src/model_manager/__init__.py b/src/model_manager/__init__.py
@@ -24,6 +24,7 @@
     MissingModelError,
     BadChecksumError,
     DownloadError,
+    GatedModelError,
 )
 
 __version__ = "0.1.0"
@@ -41,6 +42,7 @@
     "MissingModelError",
     "BadChecksumError",
     "DownloadError",
+    "GatedModelError",
 ]
 
 
@@ -96,15 +98,25 @@ def ensure_all(self) -> dict[str, str]:
                 continue
 
             logger.info(f"Downloading model '{spec.id}' from {spec.repo_id}...")
-            local_path = download_model(spec, self.config.cache_dir)
-
-            if self.config.verify != "none":
-                logger.info(f"Verifying model '{spec.id}'...")
-                if not verify_model(local_path, self.config.verify):
-                    raise BadChecksumError(f"Verification failed for model '{spec.id}'")
-
-            results[spec.id] = local_path
-            logger.info(f"Model '{spec.id}' ready at {local_path}")
+            try:
+                local_path = download_model(spec, self.config.cache_dir)
+
+                if self.config.verify != "none":
+                    logger.info(f"Verifying model '{spec.id}'...")
+                    if not verify_model(local_path, self.config.verify):
+                        raise BadChecksumError(
+                            f"Verification failed for model '{spec.id}'"
+                        )
+
+                results[spec.id] = local_path
+                logger.info(f"Model '{spec.id}' ready at {local_path}")
+            except GatedModelError as e:
+                # Gracefully skip gated models when token is not available
+                logger.warning(
+                    f"⚠️  Skipping gated model '{spec.id}': {e}. "
+                    "This is expected for PRs from forks where HF_TOKEN is not available."
+                )
+                continue
 
         return results
 
@@ -117,7 +129,14 @@ def ensure_model(self, model_id: str) -> str:
 
         Returns:
             Local path to the model
+
+        Raises:
+            GatedModelError: If the model is gated and HF_TOKEN is not available
         """
+        import logging
+
+        logger = logging.getLogger(__name__)
+
         spec = self.get_model_spec(model_id)
         if spec is None:
             raise MissingModelError(f"Model '{model_id}' not found in configuration")
diff --git a/src/model_manager/downloader.py b/src/model_manager/downloader.py
@@ -11,8 +11,14 @@
 from huggingface_hub import snapshot_download
 from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
 
+try:
+    from huggingface_hub.errors import GatedRepoError
+except ImportError:
+    # Fallback for older versions of huggingface_hub
+    GatedRepoError = None
+
 from .config import ModelSpec
-from .errors import DownloadError, MissingModelError
+from .errors import DownloadError, MissingModelError, GatedModelError
 
 logger = logging.getLogger(__name__)
 
@@ -86,6 +92,22 @@ def download_model(spec: ModelSpec, cache_dir: str) -> str:
             "Check if the revision (commit/tag/branch) exists."
         )
     except Exception as e:
+        # Check if this is a gated model error (401 Unauthorized or GatedRepoError)
+        error_str = str(e).lower()
+        is_gated_error = (
+            (GatedRepoError is not None and isinstance(e, GatedRepoError))
+            or "401" in error_str
+            or "unauthorized" in error_str
+            or "gated" in error_str
+            or "gatedrepoerror" in error_str
+        )
+
+        if is_gated_error:
+            raise GatedModelError(
+                f"Gated model '{spec.id}' requires HF_TOKEN authentication. "
+                f"Set HF_TOKEN or HUGGINGFACE_HUB_TOKEN environment variable to download."
+            ) from e
+
         raise DownloadError(
             f"Failed to download model '{spec.id}' from '{spec.repo_id}': {e}"
         ) from e
diff --git a/src/model_manager/errors.py b/src/model_manager/errors.py
@@ -31,3 +31,9 @@ class ConfigurationError(ModelManagerError):
     """Raised when configuration is invalid or missing."""
 
     pass
+
+
+class GatedModelError(ModelManagerError):
+    """Raised when attempting to download a gated model without authentication."""
+
+    pass
diff --git a/tools/make/models.mk b/tools/make/models.mk