Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions src/backend/base/langflow/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
MAX_STRING_LENGTH = 200 # Maximum length for model IDs and provider names
MAX_BATCH_UPDATE_SIZE = 100 # Maximum number of models that can be updated at once

# Module-level set of in-flight HuggingFace background downloads. Holding
# strong refs prevents the event loop from garbage-collecting the tasks
# mid-flight (RUF006); entries auto-discard on completion.
_HF_INFLIGHT_DOWNLOADS: set = set()


def get_provider_from_variable_name(variable_name: str) -> str | None:
"""Get provider name from a model provider variable name.
Expand Down Expand Up @@ -675,13 +680,64 @@ async def update_enabled_models(
variable_service, session, current_user, ENABLED_MODELS_VAR, explicitly_enabled_models
)

# Side effect: when the user enables a HuggingFace model, eagerly pull
# the weights into the local Hub cache in the background so the first
# flow invocation doesn't pay the download latency.
await _maybe_schedule_huggingface_downloads(updates, session=session, current_user=current_user)

# Return the updated model status
return {
"disabled_models": list(disabled_models),
"enabled_models": list(explicitly_enabled_models),
}


async def _maybe_schedule_huggingface_downloads(
updates: list[ModelStatusUpdate],
*,
session: DbSession,
current_user: CurrentActiveUser,
) -> None:
"""Kick off background ``snapshot_download`` for newly-enabled HF models.

No-op for non-HuggingFace providers and for toggle-off updates. Failures
here are logged but never bubble up — a download problem must not block
the toggle from being saved.
"""
import asyncio

hf_targets = [u.model_id for u in updates if u.enabled and u.provider == "HuggingFace"]
if not hf_targets:
return

api_key: str | None = None
variable_service = get_variable_service()
if isinstance(variable_service, DatabaseVariableService):
try:
api_key = await variable_service.get_variable(
user_id=current_user.id,
name="HUGGINGFACEHUB_API_TOKEN",
field=GENERIC_TYPE,
session=session,
)
except Exception as e: # noqa: BLE001
logger.debug("HUGGINGFACEHUB_API_TOKEN not set for user %s: %s", current_user.id, e)

from lfx.base.models.huggingface_chat_model import download_model

async def _download(model_id: str) -> None:
try:
await asyncio.to_thread(download_model, model_id, api_key=api_key)
logger.info("HuggingFace model %s downloaded for user %s", model_id, current_user.id)
except Exception: # noqa: BLE001
logger.exception("Background download of HuggingFace model %s failed", model_id)

for model_id in hf_targets:
task = asyncio.create_task(_download(model_id))
_HF_INFLIGHT_DOWNLOADS.add(task)
task.add_done_callback(_HF_INFLIGHT_DOWNLOADS.discard)


class DefaultModelRequest(BaseModel):
"""Request model for setting default model."""

Expand Down Expand Up @@ -868,3 +924,73 @@ async def clear_default_model(
) from e

return {"default_model": None}


# ---------------------------------------------------------------------------
# HuggingFace local model management
# ---------------------------------------------------------------------------


class HuggingFaceDownloadRequest(BaseModel):
"""Body for ``POST /huggingface/download``."""

model_id: str

@field_validator("model_id")
@classmethod
def _validate_model_id(cls, v: str) -> str:
if not v or not v.strip():
msg = "model_id cannot be empty"
raise ValueError(msg)
if len(v) > MAX_STRING_LENGTH:
msg = f"model_id exceeds maximum length of {MAX_STRING_LENGTH}"
raise ValueError(msg)
return v.strip()


@router.get("/huggingface/installed", status_code=200)
async def list_huggingface_installed(current_user: CurrentActiveUser) -> dict[str, list[str]]: # noqa: ARG001
"""List HuggingFace models present in the local Hub cache."""
from lfx.base.models.huggingface_chat_model import list_installed_models

return {"installed": list_installed_models()}


@router.post("/huggingface/download", status_code=200)
async def download_huggingface_model(
request: HuggingFaceDownloadRequest,
*,
session: DbSession,
current_user: CurrentActiveUser,
) -> dict[str, str]:
"""Download a HuggingFace model into the local Hub cache.

Reuses the user's saved ``HUGGINGFACEHUB_API_TOKEN`` (if any) to authorize
pulls of gated/private repos. Public repos download without a token.
"""
import asyncio

from lfx.base.models.huggingface_chat_model import download_model

api_key: str | None = None
variable_service = get_variable_service()
if isinstance(variable_service, DatabaseVariableService):
try:
api_key = await variable_service.get_variable(
user_id=current_user.id,
name="HUGGINGFACEHUB_API_TOKEN",
field=GENERIC_TYPE,
session=session,
)
except Exception as e: # noqa: BLE001
logger.debug("HUGGINGFACEHUB_API_TOKEN not configured for user %s: %s", current_user.id, e)

try:
path = await asyncio.to_thread(download_model, request.model_id, api_key=api_key)
except ImportError as e:
raise HTTPException(status_code=500, detail=str(e)) from e
except Exception as e:
logger.exception("Failed to download HuggingFace model %s", request.model_id)
raise HTTPException(status_code=400, detail=f"Failed to download model: {e}") from e

return {"model_id": request.model_id, "path": str(path)}
53 changes: 53 additions & 0 deletions src/backend/base/langflow/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,39 @@ def warn_about_future_cors_changes(settings):
)


async def _prefetch_default_huggingface_model() -> None:
"""Best-effort warm-up of the HF Hub cache for the bundled default model.

**Opt-in** via ``LANGFLOW_PREFETCH_HF_DEFAULT=true`` (also accepts
``1``/``yes``). Default is OFF because the underlying
``huggingface_hub.snapshot_download`` path has triggered worker SIGSEGV
on macOS arm64 + Python 3.12, and a crashing prefetch combined with
uvicorn auto-reload produces a server crash loop.

When enabled, runs as a background task during lifespan startup. Uses
``huggingface_hub.snapshot_download`` (no torch import) and downloads
only the weights / tokenizer files we actually need.
"""
if os.environ.get("LANGFLOW_PREFETCH_HF_DEFAULT", "").lower() not in {"1", "true", "yes"}:
return
try:
from lfx.base.models.huggingface_chat_model import download_model
from lfx.base.models.huggingface_constants import DEFAULT_HUGGINGFACE_MODEL
except ImportError as exc:
await logger.adebug(f"HF default-model prefetch skipped (imports unavailable): {exc}")
return

api_key = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
try:
await logger.adebug(f"Prefetching default HuggingFace model {DEFAULT_HUGGINGFACE_MODEL} into cache")
await asyncio.to_thread(download_model, DEFAULT_HUGGINGFACE_MODEL, api_key=api_key)
await logger.adebug(f"HuggingFace default model {DEFAULT_HUGGINGFACE_MODEL} ready in cache")
except Exception as exc: # noqa: BLE001
# A failed prefetch must never block startup. The first flow run will
# retry the download on demand.
await logger.awarning(f"HF default-model prefetch failed (will retry on first use): {exc}")


def get_lifespan(*, fix_migration=False, version=None):
initialize_settings_service()
telemetry_service = get_telemetry_service()
Expand All @@ -166,6 +199,7 @@ async def lifespan(_app: FastAPI):
temp_dirs: list[TemporaryDirectory] = []
sync_flows_from_fs_task = None
mcp_init_task = None
hf_prefetch_task = None

try:
start_time = asyncio.get_event_loop().time()
Expand Down Expand Up @@ -309,6 +343,22 @@ async def delayed_init_mcp_servers():
# Allows the server to start first to avoid race conditions with MCP Server startup
mcp_init_task = asyncio.create_task(delayed_init_mcp_servers())

# Background pre-download of the bundled HuggingFace model.
#
# Without this, the *first* invocation of a flow that uses the
# local HF provider would block the request thread for tens of
# seconds while transformers pulls ~720MB to ~/.cache/huggingface.
# Pre-downloading at startup keeps the cache warm so the first
# real inference only pays the load+inference cost.
#
# Uses huggingface_hub.snapshot_download exclusively — torch is
# not imported here, so this can never trigger the macOS arm64
# SIGSEGV that affects the inference path.
#
# Skippable via LANGFLOW_SKIP_HF_DEFAULT_DOWNLOAD=true; failures
# are logged but never block startup.
hf_prefetch_task = asyncio.create_task(_prefetch_default_huggingface_model())

# v1 and project MCP server context managers
from langflow.api.v1.mcp import start_streamable_http_manager
from langflow.api.v1.mcp_projects import start_project_task_group
Expand Down Expand Up @@ -383,6 +433,9 @@ async def delayed_init_mcp_servers():
if mcp_init_task and not mcp_init_task.done():
mcp_init_task.cancel()
tasks_to_cancel.append(mcp_init_task)
if hf_prefetch_task and not hf_prefetch_task.done():
hf_prefetch_task.cancel()
tasks_to_cancel.append(hf_prefetch_task)
if tasks_to_cancel:
# Wait for all tasks to complete, capturing exceptions
results = await asyncio.gather(*tasks_to_cancel, return_exceptions=True)
Expand Down
1 change: 1 addition & 0 deletions src/backend/base/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ ibm-watsonx-clients = [
]

complete = [
"langflow-base[local]",
"langflow-base[couchbase]",
"langflow-base[cassandra]",
"langflow-base[clickhouse]",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ const getProviderIcon = (providerName: string): string => {
Ollama: "Ollama",
"IBM WatsonX": "IBM",
"IBM watsonx.ai": "IBM",
HuggingFace: "HuggingFace",
};

return iconMap[providerName] || "Bot";
Expand Down
Loading
Loading