langflow-ai · Empreiteiro · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/src/backend/base/langflow/api/v1/models.py b/src/backend/base/langflow/api/v1/models.py
@@ -33,6 +33,11 @@
 MAX_STRING_LENGTH = 200  # Maximum length for model IDs and provider names
 MAX_BATCH_UPDATE_SIZE = 100  # Maximum number of models that can be updated at once
 
+# Module-level set of in-flight HuggingFace background downloads. Holding
+# strong refs prevents the event loop from garbage-collecting the tasks
+# mid-flight (RUF006); entries auto-discard on completion.
+_HF_INFLIGHT_DOWNLOADS: set = set()
+
 
 def get_provider_from_variable_name(variable_name: str) -> str | None:
     """Get provider name from a model provider variable name.
@@ -675,13 +680,64 @@ async def update_enabled_models(
         variable_service, session, current_user, ENABLED_MODELS_VAR, explicitly_enabled_models
     )
 
+    # Side effect: when the user enables a HuggingFace model, eagerly pull
+    # the weights into the local Hub cache in the background so the first
+    # flow invocation doesn't pay the download latency.
+    await _maybe_schedule_huggingface_downloads(updates, session=session, current_user=current_user)
+
     # Return the updated model status
     return {
         "disabled_models": list(disabled_models),
         "enabled_models": list(explicitly_enabled_models),
     }
 
 
+async def _maybe_schedule_huggingface_downloads(
+    updates: list[ModelStatusUpdate],
+    *,
+    session: DbSession,
+    current_user: CurrentActiveUser,
+) -> None:
+    """Kick off background ``snapshot_download`` for newly-enabled HF models.
+
+    No-op for non-HuggingFace providers and for toggle-off updates. Failures
+    here are logged but never bubble up — a download problem must not block
+    the toggle from being saved.
+    """
+    import asyncio
+
+    hf_targets = [u.model_id for u in updates if u.enabled and u.provider == "HuggingFace"]
+    if not hf_targets:
+        return
+
+    api_key: str | None = None
+    variable_service = get_variable_service()
+    if isinstance(variable_service, DatabaseVariableService):
+        try:
+            api_key = await variable_service.get_variable(
+                user_id=current_user.id,
+                name="HUGGINGFACEHUB_API_TOKEN",
+                field=GENERIC_TYPE,
+                session=session,
+            )
+        except Exception as e:  # noqa: BLE001
+            logger.debug("HUGGINGFACEHUB_API_TOKEN not set for user %s: %s", current_user.id, e)
+
+    from lfx.base.models.huggingface_chat_model import download_model
+
+    async def _download(model_id: str) -> None:
+        try:
+            await asyncio.to_thread(download_model, model_id, api_key=api_key)
+            logger.info("HuggingFace model %s downloaded for user %s", model_id, current_user.id)
+        except Exception:  # noqa: BLE001
+            logger.exception("Background download of HuggingFace model %s failed", model_id)
+
+    for model_id in hf_targets:
+        task = asyncio.create_task(_download(model_id))
+        _HF_INFLIGHT_DOWNLOADS.add(task)
+        task.add_done_callback(_HF_INFLIGHT_DOWNLOADS.discard)
+
+
 class DefaultModelRequest(BaseModel):
     """Request model for setting default model."""
 
@@ -868,3 +924,73 @@ async def clear_default_model(
         ) from e
 
     return {"default_model": None}
+
+
+# ---------------------------------------------------------------------------
+# HuggingFace local model management
+# ---------------------------------------------------------------------------
+
+
+class HuggingFaceDownloadRequest(BaseModel):
+    """Body for ``POST /huggingface/download``."""
+
+    model_id: str
+
+    @field_validator("model_id")
+    @classmethod
+    def _validate_model_id(cls, v: str) -> str:
+        if not v or not v.strip():
+            msg = "model_id cannot be empty"
+            raise ValueError(msg)
+        if len(v) > MAX_STRING_LENGTH:
+            msg = f"model_id exceeds maximum length of {MAX_STRING_LENGTH}"
+            raise ValueError(msg)
+        return v.strip()
+
+
+@router.get("/huggingface/installed", status_code=200)
+async def list_huggingface_installed(current_user: CurrentActiveUser) -> dict[str, list[str]]:  # noqa: ARG001
+    """List HuggingFace models present in the local Hub cache."""
+    from lfx.base.models.huggingface_chat_model import list_installed_models
+
+    return {"installed": list_installed_models()}
+
+
+@router.post("/huggingface/download", status_code=200)
+async def download_huggingface_model(
+    request: HuggingFaceDownloadRequest,
+    *,
+    session: DbSession,
+    current_user: CurrentActiveUser,
+) -> dict[str, str]:
+    """Download a HuggingFace model into the local Hub cache.
+
+    Reuses the user's saved ``HUGGINGFACEHUB_API_TOKEN`` (if any) to authorize
+    pulls of gated/private repos. Public repos download without a token.
+    """
+    import asyncio
+
+    from lfx.base.models.huggingface_chat_model import download_model
+
+    api_key: str | None = None
+    variable_service = get_variable_service()
+    if isinstance(variable_service, DatabaseVariableService):
+        try:
+            api_key = await variable_service.get_variable(
+                user_id=current_user.id,
+                name="HUGGINGFACEHUB_API_TOKEN",
+                field=GENERIC_TYPE,
+                session=session,
+            )
+        except Exception as e:  # noqa: BLE001
+            logger.debug("HUGGINGFACEHUB_API_TOKEN not configured for user %s: %s", current_user.id, e)
+
+    try:
+        path = await asyncio.to_thread(download_model, request.model_id, api_key=api_key)
+    except ImportError as e:
+        raise HTTPException(status_code=500, detail=str(e)) from e
+    except Exception as e:
+        logger.exception("Failed to download HuggingFace model %s", request.model_id)
+        raise HTTPException(status_code=400, detail=f"Failed to download model: {e}") from e
+
+    return {"model_id": request.model_id, "path": str(path)}
diff --git a/src/backend/base/langflow/main.py b/src/backend/base/langflow/main.py
@@ -147,6 +147,39 @@ def warn_about_future_cors_changes(settings):
         )
 
 
+async def _prefetch_default_huggingface_model() -> None:
+    """Best-effort warm-up of the HF Hub cache for the bundled default model.
+
+    **Opt-in** via ``LANGFLOW_PREFETCH_HF_DEFAULT=true`` (also accepts
+    ``1``/``yes``). Default is OFF because the underlying
+    ``huggingface_hub.snapshot_download`` path has triggered worker SIGSEGV
+    on macOS arm64 + Python 3.12, and a crashing prefetch combined with
+    uvicorn auto-reload produces a server crash loop.
+
+    When enabled, runs as a background task during lifespan startup. Uses
+    ``huggingface_hub.snapshot_download`` (no torch import) and downloads
+    only the weights / tokenizer files we actually need.
+    """
+    if os.environ.get("LANGFLOW_PREFETCH_HF_DEFAULT", "").lower() not in {"1", "true", "yes"}:
+        return
+    try:
+        from lfx.base.models.huggingface_chat_model import download_model
+        from lfx.base.models.huggingface_constants import DEFAULT_HUGGINGFACE_MODEL
+    except ImportError as exc:
+        await logger.adebug(f"HF default-model prefetch skipped (imports unavailable): {exc}")
+        return
+
+    api_key = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
+    try:
+        await logger.adebug(f"Prefetching default HuggingFace model {DEFAULT_HUGGINGFACE_MODEL} into cache")
+        await asyncio.to_thread(download_model, DEFAULT_HUGGINGFACE_MODEL, api_key=api_key)
+        await logger.adebug(f"HuggingFace default model {DEFAULT_HUGGINGFACE_MODEL} ready in cache")
+    except Exception as exc:  # noqa: BLE001
+        # A failed prefetch must never block startup. The first flow run will
+        # retry the download on demand.
+        await logger.awarning(f"HF default-model prefetch failed (will retry on first use): {exc}")
+
+
 def get_lifespan(*, fix_migration=False, version=None):
     initialize_settings_service()
     telemetry_service = get_telemetry_service()
@@ -166,6 +199,7 @@ async def lifespan(_app: FastAPI):
         temp_dirs: list[TemporaryDirectory] = []
         sync_flows_from_fs_task = None
         mcp_init_task = None
+        hf_prefetch_task = None
 
         try:
             start_time = asyncio.get_event_loop().time()
@@ -309,6 +343,22 @@ async def delayed_init_mcp_servers():
             # Allows the server to start first to avoid race conditions with MCP Server startup
             mcp_init_task = asyncio.create_task(delayed_init_mcp_servers())
 
+            # Background pre-download of the bundled HuggingFace model.
+            #
+            # Without this, the *first* invocation of a flow that uses the
+            # local HF provider would block the request thread for tens of
+            # seconds while transformers pulls ~720MB to ~/.cache/huggingface.
+            # Pre-downloading at startup keeps the cache warm so the first
+            # real inference only pays the load+inference cost.
+            #
+            # Uses huggingface_hub.snapshot_download exclusively — torch is
+            # not imported here, so this can never trigger the macOS arm64
+            # SIGSEGV that affects the inference path.
+            #
+            # Skippable via LANGFLOW_SKIP_HF_DEFAULT_DOWNLOAD=true; failures
+            # are logged but never block startup.
+            hf_prefetch_task = asyncio.create_task(_prefetch_default_huggingface_model())
+
             # v1 and project MCP server context managers
             from langflow.api.v1.mcp import start_streamable_http_manager
             from langflow.api.v1.mcp_projects import start_project_task_group
@@ -383,6 +433,9 @@ async def delayed_init_mcp_servers():
                     if mcp_init_task and not mcp_init_task.done():
                         mcp_init_task.cancel()
                         tasks_to_cancel.append(mcp_init_task)
+                    if hf_prefetch_task and not hf_prefetch_task.done():
+                        hf_prefetch_task.cancel()
+                        tasks_to_cancel.append(hf_prefetch_task)
                     if tasks_to_cancel:
                         # Wait for all tasks to complete, capturing exceptions
                         results = await asyncio.gather(*tasks_to_cancel, return_exceptions=True)

diff --git a/src/backend/base/pyproject.toml b/src/backend/base/pyproject.toml
@@ -366,6 +366,7 @@ ibm-watsonx-clients = [
 ]
 
 complete = [
+    "langflow-base[local]",
     "langflow-base[couchbase]",
     "langflow-base[cassandra]",
     "langflow-base[clickhouse]",

diff --git a/src/frontend/src/controllers/API/queries/models/use-get-model-providers.ts b/src/frontend/src/controllers/API/queries/models/use-get-model-providers.ts
@@ -86,6 +86,7 @@ const getProviderIcon = (providerName: string): string => {
     Ollama: "Ollama",
     "IBM WatsonX": "IBM",
     "IBM watsonx.ai": "IBM",
+    HuggingFace: "HuggingFace",
   };
 
   return iconMap[providerName] || "Bot";