valentinfrlch · mohit-twelvelabs · Jun 25, 2026 · Jun 28, 2026
diff --git a/README.md b/README.md
@@ -36,7 +36,7 @@
 </p>
 
 ## Features
-- Supports OpenRouter, OpenAI, Anthropic, Google Gemini, AWS Bedrock, Azure, Groq, [Ollama](https://ollama.com/), [Open WebUI](https://github.com/open-webui/open-webui), [LocalAI](https://github.com/mudler/LocalAI) and any provider with OpenAI compatible endpoints.
+- Supports OpenRouter, OpenAI, Anthropic, Google Gemini, AWS Bedrock, Azure, Groq, [TwelveLabs](https://twelvelabs.io) (Pegasus video understanding), [Ollama](https://ollama.com/), [Open WebUI](https://github.com/open-webui/open-webui), [LocalAI](https://github.com/mudler/LocalAI) and any provider with OpenAI compatible endpoints.
 - Answers questions and provides descriptions of images, video files, live camera feeds, and Frigate events based on your prompt.
 - Remembers people, pets and objects
 - Keeps a timeline of camera events, so you can display them on your dashboard or ask Assist about them.

diff --git a/custom_components/llmvision/config_flow.py b/custom_components/llmvision/config_flow.py
@@ -16,6 +16,7 @@
     Ollama,
     AWSBedrock,
     Mistral,
+    TwelveLabs,
 )
 from .const import (
     DOMAIN,
@@ -56,6 +57,7 @@
     DEFAULT_OPENWEBUI_MODEL,
     DEFAULT_OPENROUTER_MODEL,
     DEFAULT_MISTRAL_MODEL,
+    DEFAULT_TWELVELABS_MODEL,
     ENDPOINT_OPENWEBUI,
     ENDPOINT_AZURE,
     ENDPOINT_OPENROUTER,
@@ -90,6 +92,7 @@ async def handle_provider(self, provider):
             "OpenWebUI": self.async_step_openwebui,
             "OpenRouter": self.async_step_openrouter,
             "Mistral": self.async_step_mistral,
+            "TwelveLabs": self.async_step_twelvelabs,
         }
 
         step_method = provider_steps.get(provider)
@@ -129,6 +132,7 @@ async def async_step_user(self, user_input=None):
                                 "OpenWebUI",
                                 "OpenRouter",
                                 "Mistral",
+                                "TwelveLabs",
                                 "Custom OpenAI",
                             ],
                             "mode": "dropdown",
@@ -1729,6 +1733,88 @@ async def async_step_mistral(self, user_input=None):
             data_schema=data_schema,
         )
 
+    async def async_step_twelvelabs(self, user_input=None):
+        data_schema = vol.Schema(
+            {
+                vol.Optional("connection_section"): section(
+                    vol.Schema(
+                        {
+                            vol.Required(CONF_API_KEY): selector(
+                                {"text": {"type": "password"}}
+                            )
+                        }
+                    ),
+                    {"collapsed": False},
+                ),
+                vol.Optional("model_section"): section(
+                    vol.Schema(
+                        {
+                            vol.Required(
+                                CONF_DEFAULT_MODEL, default=DEFAULT_TWELVELABS_MODEL
+                            ): str,
+                            vol.Optional(CONF_TEMPERATURE, default=0.2): selector(
+                                {
+                                    "number": {
+                                        "min": 0,
+                                        "max": 1,
+                                        "step": 0.1,
+                                        "mode": "slider",
+                                    }
+                                }
+                            ),
+                        }
+                    ),
+                    {"collapsed": False},
+                ),
+            }
+        )
+
+        if self.source == config_entries.SOURCE_RECONFIGURE:
+            self.init_info = self._get_reconfigure_entry().data
+            suggested = {
+                "connection_section": {CONF_API_KEY: self.init_info.get(CONF_API_KEY)},
+                "model_section": {
+                    CONF_DEFAULT_MODEL: self.init_info.get(
+                        CONF_DEFAULT_MODEL, DEFAULT_TWELVELABS_MODEL
+                    ),
+                    CONF_TEMPERATURE: self.init_info.get(CONF_TEMPERATURE, 0.2),
+                },
+            }
+            data_schema = self.add_suggested_values_to_schema(data_schema, suggested)
+
+        if user_input is not None:
+            user_input[CONF_PROVIDER] = self.init_info[CONF_PROVIDER]
+            user_input = flatten_dict(user_input)
+            try:
+                twelvelabs = TwelveLabs(
+                    self.hass,
+                    api_key=user_input[CONF_API_KEY],
+                    model=user_input[CONF_DEFAULT_MODEL],
+                )
+                await twelvelabs.validate()
+                user_input[CONF_PROVIDER] = self.init_info[CONF_PROVIDER]
+                if self.source == config_entries.SOURCE_RECONFIGURE:
+                    return self.async_update_reload_and_abort(
+                        self._get_reconfigure_entry(),
+                        data_updates=user_input,
+                    )
+                else:
+                    return self.async_create_entry(
+                        title="TwelveLabs Pegasus", data=user_input
+                    )
+            except ServiceValidationError as e:
+                _LOGGER.error(f"Validation failed: {e}")
+                return self.async_show_form(
+                    step_id="twelvelabs",
+                    data_schema=data_schema,
+                    errors={"base": "empty_api_key"},
+                )
+
+        return self.async_show_form(
+            step_id="twelvelabs",
+            data_schema=data_schema,
+        )
+
     async def async_step_reconfigure(self, user_input):
         data = self._get_reconfigure_entry().data
         provider = data[CONF_PROVIDER]

diff --git a/custom_components/llmvision/const.py b/custom_components/llmvision/const.py
@@ -85,6 +85,9 @@
 ERROR_GROQ_MULTIPLE_IMAGES = "Groq does not support videos or streams"
 ERROR_NO_IMAGE_INPUT = "No image input provided"
 ERROR_HANDSHAKE_FAILED = "Connection could not be established"
+ERROR_TWELVELABS_ENCODE_FAILED = (
+    "Could not encode frames into a video clip for TwelveLabs Pegasus"
+)
 
 # Versions
 VERSION_ANTHROPIC = "2023-06-01"  # https://docs.anthropic.com/en/api/versioning
@@ -147,6 +150,7 @@
 DEFAULT_OPENWEBUI_MODEL = "gemma3:4b"
 DEFAULT_OPENROUTER_MODEL = "google/gemma-3-4b-it:free"
 DEFAULT_MISTRAL_MODEL = "pixtral-12b-2409"
+DEFAULT_TWELVELABS_MODEL = "pegasus1.5"
 
 DEFAULT_SUMMARY_PROMPT = "Provide a brief summary for the following titles. Focus on the key actions or changes that occurred over time and avoid unnecessary details or subjective interpretations. The summary should be concise, objective, and relevant to the content of the images. Keep the summary under 50 words and ensure it captures the main events or activities described in the descriptions. Here are the descriptions:\n "
 
@@ -161,3 +165,4 @@
 ENDPOINT_AZURE = "{base_url}openai/deployments/{deployment}/chat/completions?api-version={api_version}"
 ENDPOINT_OPENROUTER = "https://openrouter.ai/api/v1/chat/completions"
 ENDPOINT_MISTRAL = "https://api.mistral.ai/v1/chat/completions"
+ENDPOINT_TWELVELABS = "https://api.twelvelabs.io/v1.3/analyze"
diff --git a/custom_components/llmvision/providers.py b/custom_components/llmvision/providers.py
@@ -11,6 +11,9 @@
 import re
 import json
 import base64
+import asyncio
+import os
+import tempfile
 from .const import (
     DOMAIN,
     CONF_API_KEY,
@@ -37,9 +40,11 @@
     ENDPOINT_GROQ,
     ENDPOINT_OPENROUTER,
     ENDPOINT_MISTRAL,
+    ENDPOINT_TWELVELABS,
     ERROR_NOT_CONFIGURED,
     ERROR_GROQ_MULTIPLE_IMAGES,
     ERROR_NO_IMAGE_INPUT,
+    ERROR_TWELVELABS_ENCODE_FAILED,
     DEFAULT_OPENAI_MODEL,
     DEFAULT_ANTHROPIC_MODEL,
     DEFAULT_AZURE_MODEL,
@@ -52,6 +57,7 @@
     DEFAULT_OPENWEBUI_MODEL,
     DEFAULT_OPENROUTER_MODEL,
     DEFAULT_MISTRAL_MODEL,
+    DEFAULT_TWELVELABS_MODEL,
     CONF_KEEP_ALIVE,
     CONF_CONTEXT_WINDOW,
     CONF_TEMPERATURE,
@@ -133,6 +139,7 @@ def get_default_model(self, provider):
             "Open WebUI": DEFAULT_OPENWEBUI_MODEL,
             "OpenRouter": DEFAULT_OPENROUTER_MODEL,
             "Mistral": DEFAULT_MISTRAL_MODEL,
+            "TwelveLabs": DEFAULT_TWELVELABS_MODEL,
         }.get(provider_name)
 
     def validate(self, call: Any) -> None | ServiceValidationError:
@@ -2070,6 +2077,155 @@ def supports_structured_output(self) -> bool:
         return True
 
 
+class TwelveLabs(Provider):
+    """TwelveLabs Pegasus video-understanding provider.
+
+    Unlike the other providers, Pegasus is a video model: it reasons over a
+    short clip rather than independent stills. LLM Vision always decomposes its
+    inputs (videos, camera snapshots, Frigate events) into base64 JPEG frames
+    before a provider is called, so this provider re-encodes those frames back
+    into a tiny in-memory MP4 (using the ffmpeg binary the integration already
+    requires) and sends it to the Pegasus `/analyze` endpoint. This lets
+    Pegasus pick up on motion across the keyframes instead of treating them as
+    unrelated images.
+    """
+
+    def __init__(self, hass: HomeAssistant, api_key: str, model: str):
+        super().__init__(hass, api_key, model)
+
+    def _generate_headers(self) -> dict:
+        return {"x-api-key": self.api_key, "content-type": "application/json"}
+
+    async def _frames_to_mp4_base64(self, base64_images: list, fps: int = 1) -> str:
+        """Encode an ordered list of base64 JPEG frames into a base64 MP4 clip.
+
+        Pegasus needs a real (seekable) MP4 container, so we mux through a
+        temporary file rather than a pipe. ffmpeg is already a runtime
+        requirement of this integration (see media_handlers).
+        """
+        if not base64_images:
+            raise ServiceValidationError(ERROR_NO_IMAGE_INPUT)
+
+        try:
+            jpeg_bytes = b"".join(base64.b64decode(img) for img in base64_images)
+        except Exception as e:
+            _LOGGER.error(f"Failed to decode frames for TwelveLabs: {e}")
+            raise ServiceValidationError(ERROR_TWELVELABS_ENCODE_FAILED)
+
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
+            output_path = tmp.name
+
+        try:
+            ffmpeg_cmd = [
+                "ffmpeg",
+                "-y",
+                "-loglevel",
+                "error",
+                "-f",
+                "image2pipe",
+                "-vcodec",
+                "mjpeg",
+                "-framerate",
+                str(fps),
+                "-i",
+                "-",
+                "-c:v",
+                "libx264",
+                "-pix_fmt",
+                "yuv420p",
+                "-movflags",
+                "+faststart",
+                output_path,
+            ]
+            process = await asyncio.create_subprocess_exec(
+                *ffmpeg_cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            _, stderr = await process.communicate(input=jpeg_bytes)
+            if process.returncode != 0:
+                _LOGGER.error(
+                    f"ffmpeg failed to build clip for TwelveLabs: "
+                    f"{(stderr or b'').decode(errors='ignore')[:300]}"
+                )
+                raise ServiceValidationError(ERROR_TWELVELABS_ENCODE_FAILED)
+
+            with open(output_path, "rb") as f:
+                mp4_bytes = f.read()
+        finally:
+            try:
+                os.unlink(output_path)
+            except OSError:
+                pass
+
+        if not mp4_bytes:
+            raise ServiceValidationError(ERROR_TWELVELABS_ENCODE_FAILED)
+
+        return base64.b64encode(mp4_bytes).decode("utf-8")
+
+    async def _make_request(self, data: dict) -> str:
+        headers = self._generate_headers()
+        response = await self._post(url=ENDPOINT_TWELVELABS, headers=headers, data=data)
+        if not isinstance(response, dict):
+            raise ServiceValidationError("invalid_response")
+        # Pegasus may return None on an error finish_reason; surface what we can.
+        response_text = response.get("data")
+        if response_text is None:
+            raise ServiceValidationError("invalid_response")
+        return response_text.strip()
+
+    async def _prepare_vision_data(self, call: Any) -> dict:
+        # Pegasus's max_tokens has a model minimum (512); clamp to stay valid.
+        max_tokens = max(int(getattr(call, "max_tokens", 512) or 512), 512)
+        clip_base64 = await self._frames_to_mp4_base64(call.base64_images)
+        prompt = f"{self._get_system_prompt()}\n\n{call.message}"
+        return {
+            "model_name": self.model,
+            "video": {"type": "base64_string", "base64_string": clip_base64},
+            "prompt": prompt,
+            "max_tokens": max_tokens,
+            "temperature": self._get_default_parameters(call).get("temperature"),
+            "stream": False,
+        }
+
+    async def _prepare_text_data(self, call: Any) -> dict:
+        # Title generation operates on text only; Pegasus requires a video, so
+        # this path is not used (see vision_request / title_request overrides).
+        raise ServiceValidationError("invalid_provider")
+
+    async def vision_request(self, call: Any) -> str:
+        data = await self._prepare_vision_data(call)
+        return await self._make_request(data)
+
+    async def title_request(self, call: Any) -> str:
+        # Pegasus is multimodal-in/text-out only; it cannot summarise raw text.
+        # Titles are derived from the description by the orchestrator instead.
+        return "Event Detected"
+
+    async def validate(self) -> None | ServiceValidationError:
+        if not self.api_key:
+            raise ServiceValidationError("empty_api_key")
+        # Validate the key with a cheap, well-formed request. A missing video
+        # yields a 400 with a parameter error (key accepted); an invalid key
+        # yields 401. Either non-auth response confirms the key is usable.
+        headers = self._generate_headers()
+        data = {"model_name": self.model, "prompt": "Hi", "max_tokens": 512}
+        try:
+            await self._post(url=ENDPOINT_TWELVELABS, headers=headers, data=data)
+        except ServiceValidationError as e:
+            message = str(e).lower()
+            if (
+                "api" in message
+                and "key" in message
+                or "auth" in message
+                or "401" in message
+            ):
+                raise ServiceValidationError("empty_api_key")
+            # A parameter/validation error means the key authenticated fine.
+            return None
+
+
 class ProviderFactory:
     """
     Factory to create provider instances from a provider name and config
@@ -2192,4 +2348,9 @@ def create(
                 model=model,
             )
 
+        if provider_name == "TwelveLabs":
+            return TwelveLabs(
+                hass, api_key=cast(str, config.get(CONF_API_KEY) or ""), model=model
+            )
+
         raise ServiceValidationError("invalid_provider")
diff --git a/custom_components/llmvision/strings.json b/custom_components/llmvision/strings.json
@@ -389,6 +389,34 @@
                     }
                 }
             },
+            "twelvelabs": {
+                "title": "Configure TwelveLabs Pegasus",
+                "description": "Provide a valid TwelveLabs API key from the TwelveLabs dashboard.",
+                "sections": {
+                    "connection_section": {
+                        "name": "Connection",
+                        "description": "TwelveLabs authentication",
+                        "data": {
+                            "api_key": "API key"
+                        },
+                        "data_description": {
+                            "api_key": "Your TwelveLabs API key from the TwelveLabs dashboard."
+                        }
+                    },
+                    "model_section": {
+                        "name": "Model",
+                        "description": "Set default model parameters",
+                        "data": {
+                            "default_model": "Default model",
+                            "temperature": "Temperature"
+                        },
+                        "data_description": {
+                            "default_model": "The Pegasus model to use (for example pegasus1.5). Pegasus analyzes the captured frames as a short video clip.",
+                            "temperature": "Controls the randomness of the output. Lower values make the output more deterministic."
+                        }
+                    }
+                }
+            },
             "settings": {
                 "title": "Settings",
                 "description": "Configure the LLM Vision integration. This entry is required before setting up other providers. If you wish to use the default settings, just press 'Submit'.",
@@ -461,4 +489,4 @@
             "reconfigure_successful": "Saved settings successfully"
         }
     }
-}
+}