diff --git a/server/api.py b/server/api.py
index a7af2e8..a47dd7c 100644
--- a/server/api.py
+++ b/server/api.py
@@ -1,6 +1,6 @@
 from fastapi import FastAPI, HTTPException
 
-from .schemas import ChatMessage,  ChatCompletionRequest, StartRequest, downloadRequest
+from .schemas import ChatMessage, ChatCompletionRequest, StartRequest, downloadRequest, ResponseRequest
 from .config import SYSTEM_PROMPT
 import logging
 import sys
@@ -79,3 +79,14 @@ async def create_chat_completion(request: ChatCompletionRequest):
             )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/v1/responses")
+async def create_response(request: ResponseRequest):
+    """Create a non-streaming completion response."""
+    try:
+        response = await runtime.backend.generate_response(request)
+        return response
+    except Exception as e:
+        logger.exception("Error in generate_response")
+        raise HTTPException(status_code=500, detail=str(e)) from e
diff --git a/server/backend/mlx.py b/server/backend/mlx_backend.py
similarity index 66%
rename from server/backend/mlx.py
rename to server/backend/mlx_backend.py
index 9133694..23d9c41 100644
--- a/server/backend/mlx.py
+++ b/server/backend/mlx_backend.py
@@ -1,10 +1,11 @@
 from .mlx_runner import MLXRunner
 from ..cache_utils import get_model_path
 from fastapi import HTTPException
-from ..schemas import ChatMessage,  ChatCompletionRequest, downloadRequest
+from ..schemas import ChatMessage, ChatCompletionRequest, downloadRequest, ResponseRequest
 from ..hf_downloader import pull_model
 
 import logging
+import asyncio
 import json
 import time
 import uuid
@@ -114,6 +115,15 @@ async def generate_chat_stream(
 
     # Stream tokens
     try:
+        json_schema = None
+        if request.response_format:
+            if request.response_format.get("type") == "json_schema":
+                schema_info = request.response_format.get("json_schema", {})
+                json_schema = json.dumps(schema_info.get("schema", {}))
+            elif request.response_format.get("type") == "json_object":
+                # Fallback for json_object type
+                json_schema = "{}" 
+
         for token in runner.generate_streaming(
             prompt=prompt,
             max_tokens=runner.get_effective_max_tokens(
@@ -124,6 +134,7 @@ async def generate_chat_stream(
             repetition_penalty=request.repetition_penalty,
             use_chat_template=False,  # Already applied in _format_conversation
             use_chat_stop_tokens=False,  # Server mode shouldn't stop on chat markers
+            json_schema=json_schema,
         ):
             chunk_response = {
                 "id": completion_id,
@@ -168,6 +179,79 @@ async def generate_chat_stream(
     yield f"data: {json.dumps(final_response)}\n\n"
     yield "data: [DONE]\n\n"
 
+
+async def generate_response(request: ResponseRequest) -> Dict[str, Any]:
+    """Generate complete non-streaming chat completion response."""
+    completion_id = f"chatcmpl-{uuid.uuid4()}"
+    created = int(time.time())
+    runner = get_or_load_model(request.model)
+
+    # Convert messages to dict format for runner
+    message_dicts = format_chat_messages_for_runner(request.messages)
+
+    # Let the runner format with chat templates
+    prompt = runner._format_conversation(message_dicts, use_chat_template=True)
+
+    json_schema = None
+    if request.response_format:
+        if request.response_format.get("type") == "json_schema":
+            schema_info = request.response_format.get("json_schema", {})
+            json_schema = json.dumps(schema_info.get("schema", {}))
+        elif request.response_format.get("type") == "json_object":
+            # Fallback for json_object type
+            json_schema = "{}"
+
+    response_text = await asyncio.to_thread(
+        runner.generate_batch,
+        prompt=prompt,
+        max_tokens=runner.get_effective_max_tokens(
+            request.max_tokens or _default_max_tokens, interactive=False
+        ),
+        temperature=request.temperature,
+        top_p=request.top_p,
+        repetition_penalty=request.repetition_penalty,
+        use_chat_template=False,
+        json_schema=json_schema,
+    )
+
+    # Handle stop sequences if provided
+    if request.stop:
+        stop_sequences = (
+            request.stop if isinstance(request.stop, list) else [request.stop]
+        )
+        min_index = len(response_text)
+        found_stop = False
+        for stop in stop_sequences:
+            index = response_text.find(stop)
+            if index != -1:
+                min_index = min(min_index, index)
+                found_stop = True
+        
+        if found_stop:
+            response_text = response_text[:min_index]
+
+    prompt_tokens = count_tokens(prompt)
+    completion_tokens = count_tokens(response_text)
+
+    return {
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": created,
+        "model": request.model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": response_text},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        },
+    }
+
 def format_chat_messages_for_runner(
     messages: List[ChatMessage],
 ) -> List[Dict[str, str]]:
diff --git a/server/backend/mlx_engine/__init__.py b/server/backend/mlx_engine/__init__.py
new file mode 100644
index 0000000..3f29aaf
--- /dev/null
+++ b/server/backend/mlx_engine/__init__.py
@@ -0,0 +1,52 @@
+"""
+`mlx_engine` is LM Studio's LLM inferencing engine for Apple MLX
+"""
+
+__all__ = [
+    "load_model",
+    "load_draft_model",
+    "is_draft_model_compatible",
+    "unload_draft_model",
+    "create_generator",
+    "tokenize",
+]
+
+from pathlib import Path
+import os
+
+from .utils.disable_hf_download import patch_huggingface_hub
+from .utils.register_models import register_models
+from .utils.logger import setup_logging
+
+
+from .generate import (
+    load_model,
+    load_draft_model,
+    is_draft_model_compatible,
+    unload_draft_model,
+    create_generator,
+    tokenize,
+)
+
+patch_huggingface_hub()
+register_models()
+setup_logging()
+
+
+def _set_outlines_cache_dir(cache_dir: Path | str):
+    """
+    Set the cache dir for Outlines.
+
+    Outlines reads the OUTLINES_CACHE_DIR environment variable to
+    determine where to read/write its cache files
+    """
+    if "OUTLINES_CACHE_DIR" in os.environ:
+        return
+
+    cache_dir = Path(cache_dir).expanduser().resolve()
+    os.environ["OUTLINES_CACHE_DIR"] = str(cache_dir)
+
+
+_set_outlines_cache_dir(
+    os.getenv("TILES_OUTLINES_CACHE", "~/.cache/tiles/.internal/outlines")
+)
diff --git a/server/backend/mlx_engine/cache_wrapper.py b/server/backend/mlx_engine/cache_wrapper.py
new file mode 100644
index 0000000..9667395
--- /dev/null
+++ b/server/backend/mlx_engine/cache_wrapper.py
@@ -0,0 +1,334 @@
+from typing import Callable, List, Optional, Any
+import logging
+from mlx_lm.models.cache import (
+    make_prompt_cache,
+    trim_prompt_cache,
+    can_trim_prompt_cache,
+)
+from mlx_lm.generate import generation_stream, maybe_quantize_kv_cache
+import mlx.core as mx
+import mlx.nn as nn
+import sys
+
+
+PROMPT_PROCESSING_CHUNK_SIZE = 512
+
+logger = logging.getLogger(__name__)
+
+
+class StopPromptProcessing(Exception):
+    """
+    Exception to signal that the user aborted generation during prompt processing.
+    """
+
+
+class CacheWrapper:
+    """
+    Wrapper class for the MLX LM cache to maintain an in-memory cache
+    """
+
+    def __init__(
+        self,
+        model: nn.Module,
+        max_kv_size: Optional[int],
+        *,
+        verbose: bool = False,
+        kv_bits: Optional[int] = None,
+        kv_group_size: Optional[int] = None,
+        quantized_kv_start: Optional[int] = None,
+        chunk_size: int = PROMPT_PROCESSING_CHUNK_SIZE,
+    ):
+        """
+        Initialize the CacheWrapper.
+
+        Args:
+            model (nn.Module): The model to be cached.
+            max_kv_size (Optional[int]): Maximum size of the key-value cache.
+        """
+        # utilize a simple ordered list of tokens processed so far for cache invalidation checking
+        self.tokens: Optional[mx.array] = None
+        self.cache: List[Any] = make_prompt_cache(model, max_kv_size)
+        self.model = model
+        self.draft_model: Optional[nn.Module] = None
+        self.max_kv_size = max_kv_size
+        self.verbose = verbose
+        self.kv_cache_qtn_params = dict(
+            kv_bits=kv_bits,
+            kv_group_size=kv_group_size,
+            quantized_kv_start=quantized_kv_start,
+        )
+        self.chunk_size = chunk_size
+
+    def _get_num_tokens_in_cache(self) -> int | None:
+        """
+        Get the number of tokens in the cache.
+
+        Returns:
+            int | None: The number of tokens in the cache, or None if the size cannot be determined.
+        """
+        for c in self.cache:
+            if hasattr(c, "offset"):
+                return c.offset
+        return None
+
+    @staticmethod
+    def _find_common_prefix(
+        current_tokens: mx.array, prompt_tokens: mx.array, num_tokens_to_exclude: int
+    ) -> int:
+        """
+        Determine the common prefix length between the current tokens and the prompt tokens.
+
+        Args:
+            current_tokens (mx.array): The cached tokens (self.tokens).
+            prompt_tokens (mx.array): The prompt tokens.
+            num_tokens_to_exclude (int): The minimum length of the remaining prompt tokens array.
+
+        Returns:
+            int: The length of the common prefix.
+        """
+        prompt_tokens = prompt_tokens
+        current_tokens = current_tokens
+        # Find the minimum length between the two arrays
+        min_length = min(len(current_tokens), len(prompt_tokens))
+
+        # Compare elements up to the minimum length
+        mask = prompt_tokens[:min_length] == current_tokens[:min_length]
+
+        # Find the index where the first mismatch occurs
+        if mx.any(mask == False):  # noqa E712
+            common_length = int(mx.argmax(mask == False))  # noqa E712
+        else:
+            common_length = int(min_length)
+
+        # Ensure that the prompt is at least num_tokens_to_exclude long
+        uncached_prompt_tokens_length = len(prompt_tokens[common_length:])
+        length_adjustment = max(
+            0, num_tokens_to_exclude - uncached_prompt_tokens_length
+        )
+        common_length = max(common_length - length_adjustment, 0)
+        return common_length
+
+    def _get_unprocessed_tokens(
+        self, prompt_tokens: mx.array, num_tokens_to_exclude: int
+    ):
+        """
+        Get the unprocessed tokens from the prompt.
+
+        Args:
+            prompt_tokens (mx.array): The prompt tokens.
+            num_tokens_to_exclude (int): The number of tokens that should not be added to the cache.
+
+        Returns:
+            mx.array: The unprocessed tokens.
+        """
+        if self.tokens is None:
+            self.tokens = prompt_tokens
+            return self.tokens
+
+        # Find common KV between the last generation and the current prompt
+        common_prefix = self._find_common_prefix(
+            self.tokens, prompt_tokens, num_tokens_to_exclude
+        )
+
+        # Trim the cache if the common prefix is shorter than the current cache
+        num_tokens_in_cache = self._get_num_tokens_in_cache()
+        if num_tokens_in_cache is None:
+            logger.warning(
+                "Could not determine the number of tokens in the cache, clearing the cache."
+            )
+            self.cache = make_prompt_cache(self.model, self.max_kv_size)
+            self.tokens = prompt_tokens
+            return self.tokens
+        num_tokens_to_trim = num_tokens_in_cache - common_prefix
+        if num_tokens_to_trim > 0:
+            if not can_trim_prompt_cache(self.cache):
+                logger.warning(
+                    f"Tried to trim '{num_tokens_to_trim}' tokens from the prompt cache, but could not: Cache is not trimmable. Clearing the cache instead."
+                )
+                self.cache = make_prompt_cache(self.model, self.max_kv_size)
+                self.tokens = prompt_tokens
+                return self.tokens
+            tokens_trimmed = trim_prompt_cache(self.cache, num_tokens_to_trim)
+            if tokens_trimmed != num_tokens_to_trim:
+                # If we trimmed fewer tokens than expected, the cache is invalid
+                logger.error(
+                    f"Tokens trimmed from cache ({tokens_trimmed}) is less than expected ({num_tokens_to_trim}). Clearing the cache."
+                )
+                self.cache = make_prompt_cache(self.model, self.max_kv_size)
+                self.tokens = prompt_tokens
+                return self.tokens
+            logger.info(f"Trimmed {num_tokens_to_trim} tokens from the prompt cache")
+
+        # Keep track of the prompt tokens
+        self.tokens = prompt_tokens
+
+        if self.verbose:
+            print(f"Common prefix length: {common_prefix}", file=sys.stderr)
+            print(f"Trimmed tokens: {num_tokens_to_trim}", file=sys.stderr)
+
+        # All of the common tokens are now in the cache, so we can return the remaining tokens that still need to be processed
+        return prompt_tokens[common_prefix:]
+
+    def _prefill(
+        self,
+        model,
+        cache,
+        tokens,
+        progress_callback: Callable[[float], bool],
+        start_progress: float,
+        end_progress: float,
+    ):
+        """
+        Fill a KV cache for a specific model
+
+        Args:
+            model: The model to use for cache filling
+            cache: The cache to fill
+            tokens: Tokens to process
+            progress_callback: Callback for reporting progress
+            start_progress: Starting progress percentage
+            end_progress: Ending progress percentage
+        """
+        remaining_tokens = tokens
+        num_processed = 0
+        total_tokens = len(tokens)
+
+        while remaining_tokens.size > 0:
+            current_chunk_size = min(self.chunk_size, remaining_tokens.size)
+            current_chunk = remaining_tokens[:current_chunk_size]
+
+            model(current_chunk[None], cache=cache)
+            maybe_quantize_kv_cache(prompt_cache=cache, **self.kv_cache_qtn_params)
+            mx.eval([c.state for c in cache])
+
+            remaining_tokens = remaining_tokens[current_chunk_size:]
+            num_processed += current_chunk_size
+
+            # Scale progress to fit between start_progress and end_progress
+            progress = start_progress + (end_progress - start_progress) * (
+                num_processed / total_tokens
+            )
+            mx.clear_cache()
+            should_continue = progress_callback(progress)
+            if should_continue is False:  # If it's None, assume continue generation
+                logger.info("Prompt processing was cancelled by the user.")
+                num_tokens_in_cache = self._get_num_tokens_in_cache()
+                if num_tokens_in_cache is not None and num_tokens_in_cache > len(
+                    self.tokens
+                ):
+                    logger.warning(
+                        "The number of tokens in the cache is greater than the number of prompt tokens. This is unexpected. Clearing the cache."
+                    )
+                    num_tokens_in_cache = None
+                if num_tokens_in_cache is None:
+                    self.cache = make_prompt_cache(self.model, self.max_kv_size)
+                    self.tokens = None
+                else:
+                    # Remember which tokens were processed so far, so that we can continue processing at a later point
+                    self.tokens = self.tokens[:num_tokens_in_cache]
+                raise StopPromptProcessing
+
+    def set_draft_model(self, draft_model: nn.Module):
+        """
+        Sets or updates the draft model to use in the cache.
+
+        If the provided draft_model is already set, returns without changes.
+        Otherwise, clears existing cache and rebuilds it by combining caches
+        from the main model and draft model. Requires a main model to be set first.
+        Args:
+            draft_model: The draft model to cache. Pass None to remove draft model.
+
+        Raises:
+            ValueError: If main model hasn't been set yet.
+        """
+        if self.model is None:
+            raise ValueError("Cannot add a draft model to cache without a main model")
+        if self.max_kv_size is not None:
+            logger.info("Disabling max_kv_size when setting a draft model for cache")
+            self.max_kv_size = None
+
+        if self.draft_model is draft_model:
+            # Skip if the exact same draft model instance is already in cache
+            return
+
+        # clear the current cache, append draft model cache to the end of the main model cache as per
+        # https://github.com/ml-explore/mlx-examples/blob/514502da22f0dc4c1ac439bdf78c07d5ec41acf7/llms/mlx_lm/utils.py#L381-L382
+        logger.info("Clearing current prompt cache and adding draft model to the cache")
+        self.tokens = None
+        self.cache: List[Any] = make_prompt_cache(self.model)
+        if draft_model is not None:
+            self.cache += make_prompt_cache(draft_model)
+        self.draft_model = draft_model
+
+    def unset_draft_model(self):
+        """Removes the draft model from the cache if one exists."""
+        if self.draft_model is None:
+            return
+        self.draft_model = None
+        self.cache = self.cache[: len(self.model.layers)]
+
+    def update_cache(
+        self,
+        prompt_tokens: mx.array,
+        prompt_progress_callback,
+        *,
+        num_tokens_to_exclude: int = 1,
+    ) -> mx.array:
+        """
+        Set up the KV cache for the next generation.
+        Re-use as much of the KV cache from the previous generation as possible.
+
+        Args:
+            prompt_tokens (mx.array): The prompt tokens.
+            prompt_progress_callback (Callable): A callback function to report prompt processing progress.
+            num_tokens_to_exclude (int): The number of tokens that should not be added to the cache.
+
+        Returns:
+            mx.array: The prompt tokens to be used for the next generation.
+        """
+        if prompt_progress_callback is None:
+
+            def prompt_progress_callback(_) -> bool:
+                return True
+
+        num_tokens_to_exclude = max(num_tokens_to_exclude, 1)
+        prompt_tokens = self._get_unprocessed_tokens(
+            prompt_tokens, num_tokens_to_exclude
+        )
+
+        # Prefill the cache with the non-excluded prompt tokens
+        num_tokens_to_exclude = min(num_tokens_to_exclude, len(prompt_tokens))
+        prefill_tokens = prompt_tokens[:-num_tokens_to_exclude]
+        prompt_progress_callback(0)
+        with mx.stream(generation_stream):
+            if self.draft_model is not None:
+                # Fill draft model cache (0% to 50% progress)
+                draft_cache = self.cache[len(self.model.layers) :]
+                self._prefill(
+                    model=self.draft_model,
+                    cache=draft_cache,
+                    tokens=prefill_tokens,
+                    progress_callback=prompt_progress_callback,
+                    start_progress=0,
+                    end_progress=50,
+                )
+            # Fill main model cache (50% to 100% progress for draft model, 0% to 100% otherwise)
+            main_cache = self.cache[: len(self.model.layers)]
+            self._prefill(
+                model=self.model,
+                cache=main_cache,
+                tokens=prefill_tokens,
+                progress_callback=prompt_progress_callback,
+                start_progress=50 if self.draft_model is not None else 0,
+                end_progress=100,
+            )
+
+        # Return the tokens that must still be processed outside of the cache
+        non_prefill_tokens = prompt_tokens[-num_tokens_to_exclude:]
+        return non_prefill_tokens
+
+    def record_generated_token(self, token):
+        """
+        Add the generated token to the token list, so that we can map the token to the KV cache.
+        """
+        self.tokens = mx.concat([self.tokens, mx.array([token])])
diff --git a/server/backend/mlx_engine/external/datasets/dill.py b/server/backend/mlx_engine/external/datasets/dill.py
new file mode 100644
index 0000000..3068f3e
--- /dev/null
+++ b/server/backend/mlx_engine/external/datasets/dill.py
@@ -0,0 +1,217 @@
+# copied from https://github.com/huggingface/datasets/blob/1e1d313/src/datasets/utils/_dill.py
+
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Extends `dill` to support pickling more types and produce more consistent dumps."""
+
+import sys
+from io import BytesIO
+from types import FunctionType
+from typing import Any, Dict, List, Union
+
+import dill
+import xxhash
+
+
+class Hasher:
+    """Hasher that accepts python objects as inputs."""
+
+    dispatch: Dict = {}
+
+    def __init__(self):
+        self.m = xxhash.xxh64()
+
+    @classmethod
+    def hash_bytes(cls, value: Union[bytes, List[bytes]]) -> str:
+        value = [value] if isinstance(value, bytes) else value
+        m = xxhash.xxh64()
+        for x in value:
+            m.update(x)
+        return m.hexdigest()
+
+    @classmethod
+    def hash(cls, value: Any) -> str:
+        return cls.hash_bytes(dumps(value))
+
+    def update(self, value: Any) -> None:
+        header_for_update = f"=={type(value)}==Unit"
+        value_for_update = self.hash(value)
+        self.m.update(header_for_update.encode("utf8"))
+        self.m.update(value_for_update.encode("utf-8"))
+
+    def hexdigest(self) -> str:
+        return self.m.hexdigest()
+
+
+class Pickler(dill.Pickler):
+    dispatch = dill._dill.MetaCatchingDict(dill.Pickler.dispatch.copy())
+    _legacy_no_dict_keys_sorting = False
+
+    def save(self, obj, save_persistent_id=True):
+        obj_type = type(obj)
+        if obj_type not in self.dispatch:
+            if "regex" in sys.modules:
+                import regex  # type: ignore
+
+                if obj_type is regex.Pattern:
+                    pklregister(obj_type)(_save_regexPattern)
+            if "spacy" in sys.modules:
+                import spacy  # type: ignore
+
+                if issubclass(obj_type, spacy.Language):
+                    pklregister(obj_type)(_save_spacyLanguage)
+            if "tiktoken" in sys.modules:
+                import tiktoken  # type: ignore
+
+                if obj_type is tiktoken.Encoding:
+                    pklregister(obj_type)(_save_tiktokenEncoding)
+            if "torch" in sys.modules:
+                import torch  # type: ignore
+
+                if issubclass(obj_type, torch.Tensor):
+                    pklregister(obj_type)(_save_torchTensor)
+
+                if obj_type is torch.Generator:
+                    pklregister(obj_type)(_save_torchGenerator)
+
+                # Unwrap `torch.compile`-ed modules
+                if issubclass(obj_type, torch.nn.Module):
+                    obj = getattr(obj, "_orig_mod", obj)
+            if "transformers" in sys.modules:
+                import transformers  # type: ignore
+
+                if issubclass(obj_type, transformers.PreTrainedTokenizerBase):
+                    pklregister(obj_type)(_save_transformersPreTrainedTokenizerBase)
+
+        # Unwrap `torch.compile`-ed functions
+        if obj_type is FunctionType:
+            obj = getattr(obj, "_torchdynamo_orig_callable", obj)
+        dill.Pickler.save(self, obj, save_persistent_id=save_persistent_id)
+
+    def _batch_setitems(self, items):
+        if self._legacy_no_dict_keys_sorting:
+            return super()._batch_setitems(items)
+        # Ignore the order of keys in a dict
+        try:
+            # Faster, but fails for unorderable elements
+            items = sorted(items)
+        except Exception:  # TypeError, decimal.InvalidOperation, etc.
+            items = sorted(items, key=lambda x: Hasher.hash(x[0]))
+        dill.Pickler._batch_setitems(self, items)
+
+    def memoize(self, obj):
+        # Don't memoize strings since two identical strings can have different Python ids
+        if type(obj) is not str:  # noqa: E721
+            dill.Pickler.memoize(self, obj)
+
+
+def pklregister(t):
+    """Register a custom reducer for the type."""
+
+    def proxy(func):
+        Pickler.dispatch[t] = func
+        return func
+
+    return proxy
+
+
+def dump(obj, file):
+    """Pickle an object to a file."""
+    Pickler(file, recurse=True).dump(obj)
+
+
+def dumps(obj):
+    """Pickle an object to a string."""
+    file = BytesIO()
+    dump(obj, file)
+    return file.getvalue()
+
+
+def log(pickler, msg):
+    pass
+
+
+def _save_regexPattern(pickler, obj):
+    import regex  # type: ignore
+
+    log(pickler, f"Re: {obj}")
+    args = (obj.pattern, obj.flags)
+    pickler.save_reduce(regex.compile, args, obj=obj)
+    log(pickler, "# Re")
+
+
+def _save_tiktokenEncoding(pickler, obj):
+    import tiktoken  # type: ignore
+
+    log(pickler, f"Enc: {obj}")
+    args = (obj.name, obj._pat_str, obj._mergeable_ranks, obj._special_tokens)
+    pickler.save_reduce(tiktoken.Encoding, args, obj=obj)
+    log(pickler, "# Enc")
+
+
+def _save_torchTensor(pickler, obj):
+    import torch  # type: ignore
+
+    # `torch.from_numpy` is not picklable in `torch>=1.11.0`
+    def create_torchTensor(np_array, dtype=None):
+        tensor = torch.from_numpy(np_array)
+        if dtype:
+            tensor = tensor.type(dtype)
+        return tensor
+
+    log(pickler, f"To: {obj}")
+    if obj.dtype == torch.bfloat16:
+        args = (obj.detach().to(torch.float).cpu().numpy(), torch.bfloat16)
+    else:
+        args = (obj.detach().cpu().numpy(),)
+    pickler.save_reduce(create_torchTensor, args, obj=obj)
+    log(pickler, "# To")
+
+
+def _save_torchGenerator(pickler, obj):
+    import torch  # type: ignore
+
+    def create_torchGenerator(state):
+        generator = torch.Generator()
+        generator.set_state(state)
+        return generator
+
+    log(pickler, f"Ge: {obj}")
+    args = (obj.get_state(),)
+    pickler.save_reduce(create_torchGenerator, args, obj=obj)
+    log(pickler, "# Ge")
+
+
+def _save_spacyLanguage(pickler, obj):
+    import spacy  # type: ignore
+
+    def create_spacyLanguage(config, bytes):
+        lang_cls = spacy.util.get_lang_class(config["nlp"]["lang"])
+        lang_inst = lang_cls.from_config(config)
+        return lang_inst.from_bytes(bytes)
+
+    log(pickler, f"Sp: {obj}")
+    args = (obj.config, obj.to_bytes())
+    pickler.save_reduce(create_spacyLanguage, args, obj=obj)
+    log(pickler, "# Sp")
+
+
+def _save_transformersPreTrainedTokenizerBase(pickler, obj):
+    log(pickler, f"Tok: {obj}")
+    # Ignore the `cache` attribute
+    state = obj.__dict__.copy()
+    if "cache" in state and isinstance(state["cache"], dict):
+        state["cache"] = {}
+    pickler.save_reduce(type(obj), (), state=state, obj=obj)
+    log(pickler, "# Tok")
diff --git a/server/backend/mlx_engine/generate.py b/server/backend/mlx_engine/generate.py
new file mode 100644
index 0000000..7e3e72c
--- /dev/null
+++ b/server/backend/mlx_engine/generate.py
@@ -0,0 +1,490 @@
+from typing import Callable, Iterator, List, Literal, NamedTuple, Optional
+import json
+import logging
+from pathlib import Path
+import sys
+
+from mlx_lm.generate import stream_generate
+from mlx_lm.sample_utils import make_sampler
+
+from .model_kit.model_kit import ModelKit
+from .vision_model_kit.vision_model_kit import VisionModelKit
+from .processors.repetition_penalty_processor import (
+    RepetitionPenaltyProcessor,
+)
+from .utils.token import Token
+from .utils.eot_tokens import sanitize_eos_tokens
+from .utils.top_logprobs import summarize_top_logprobs
+from .stop_string_processor import (
+    StopStringProcessor,
+    StopStringProcessorResult,
+)
+from .utils.set_seed import set_seed
+from .utils.speculative_decoding import (
+    determine_draft_model_for_generation,
+    configure_num_draft_tokens_in_generate_args,
+)
+from outlines.processors.structured import JSONLogitsProcessor
+from .utils.outlines_transformer_tokenizer import OutlinesTransformerTokenizer
+from .cache_wrapper import StopPromptProcessing, PROMPT_PROCESSING_CHUNK_SIZE
+from .utils.progress_decorators import ratchet, throw_to_stop, token_count
+
+MAX_TOP_LOGPROBS = 10
+
+StopReason = Literal["eos_token", "stop_string", "user_cancelled"]
+
+logger = logging.getLogger(__name__)
+
+
+class GenerationStopCondition(NamedTuple):
+    stop_reason: StopReason
+    stop_string: str
+    # sequence of token ids that the stop string was found in
+    stop_tokens: List[int]
+
+
+class GenerationResult(NamedTuple):
+    text: str
+    tokens: List[Token]
+    top_logprobs: List[List[Token]]
+    stop_condition: Optional[GenerationStopCondition]
+
+
+def construct_user_cancelled_result():
+    return GenerationResult(
+        text="",
+        tokens=[],
+        top_logprobs=[],
+        stop_condition=GenerationStopCondition(
+            stop_reason="user_cancelled",
+            stop_string="",
+            stop_tokens=[],
+        ),
+    )
+
+
+def load_model(
+    model_path: str | Path,
+    *,
+    vocab_only: bool = False,
+    max_kv_size: Optional[int] = 4096,
+    trust_remote_code: bool = False,
+    kv_bits: Optional[int] = None,
+    kv_group_size: Optional[int] = None,
+    quantized_kv_start: Optional[int] = None,
+    adapter_path: Optional[str | Path] = None,
+) -> ModelKit | VisionModelKit:
+    """
+    Load a language model or vision-language model from the specified path.
+
+    This function determines the model type based on the config.json file in the model directory
+    and initializes either a standard language model or a vision-language model accordingly.
+
+    Args:
+        model_path (str | Path): Path to the model directory containing model files and config.json.
+        vocab_only (bool): Only load vocabulary/tokenizer, not the full model.
+        max_kv_size (int): Maximum size of the key-value cache used during model inference.
+        trust_remote_code (bool): Whether to allow loading of remote code during model initialization.
+        kv_bits (Optional[int]): Number of bits for KV cache quantization.
+        kv_group_size (Optional[int]): Group size for KV cache quantization.
+        quantized_kv_start (Optional[int]): Step to begin KV cache quantization when enabled.
+        adapter_path (Optional[str | Path]): Path to LoRA adapter weights.
+
+    Returns:
+        ModelKit | VisionModelKit: An initialized model instance:
+            - ModelKit: for text-only models and vision models with vision add-on support
+            - VisionModelKit: for vision models that are not yet supported by ModelKit
+
+    Raises:
+        FileNotFoundError: If config.json is not found in the specified model path
+        json.JSONDecodeError: If config.json exists but contains invalid JSON
+        ValueError: If the model configuration is invalid or unsupported
+    """
+    model_path = Path(model_path)
+    config_json = json.loads((model_path / "config.json").read_text())
+    model_type = config_json.get("model_type", None)
+
+    # only use VisionModelKit if ModelKit doesn't have vision support for this model
+    if "vision_config" in config_json and not ModelKit.is_supported_vision_arch(
+        model_type
+    ):
+        if any([kv_bits, kv_group_size, quantized_kv_start]):
+            raise ValueError(
+                "MLX vision models do not currently support KV cache quantization"
+            )
+        if adapter_path:
+            raise ValueError("MLX vision models do not currently support LoRA adapters")
+        model_kit = VisionModelKit(model_path, vocab_only, trust_remote_code)
+    else:
+        model_kit = ModelKit(
+            model_path,
+            vocab_only,
+            max_kv_size,
+            kv_bits=kv_bits,
+            kv_group_size=kv_group_size,
+            quantized_kv_start=quantized_kv_start,
+            adapter_path=adapter_path,
+        )
+    sanitize_eos_tokens(model_kit)
+    return model_kit
+
+
+def load_draft_model(model_kit: ModelKit | VisionModelKit, path: str | Path) -> None:
+    model_kit.load_draft_model(path)
+
+
+def is_draft_model_compatible(
+    model_kit: ModelKit | VisionModelKit, path: str | Path
+) -> bool:
+    return model_kit.is_draft_model_compatible(path)
+
+
+def unload_draft_model(model_kit: ModelKit | VisionModelKit) -> None:
+    model_kit.unload_draft_model()
+
+
+def create_generator(
+    model_kit: ModelKit | VisionModelKit,
+    prompt_tokens: List[int],
+    *,
+    prompt_progress_callback: Optional[Callable[[float], bool]] = None,
+    images_b64: Optional[List[str]] = None,
+    max_image_size: Optional[tuple[int, int]] = None,
+    stop_strings: Optional[List[str]] = None,
+    top_logprobs: Optional[int] = None,
+    repetition_penalty: Optional[float] = None,
+    repetition_context_size: Optional[int] = 20,
+    temp: Optional[float] = None,
+    top_p: Optional[float] = None,
+    top_k: Optional[int] = None,
+    min_p: Optional[float] = None,
+    min_tokens_to_keep: Optional[int] = None,
+    seed: Optional[int] = None,
+    json_schema: Optional[str] = None,
+    max_tokens: Optional[int] = 10000000,
+    speculative_decoding_toggle: Optional[bool] = None,
+    num_draft_tokens: Optional[int] = None,
+) -> Iterator[GenerationResult]:
+    """
+    Create a generator that streams text generation results from the model.
+
+    This function sets up and manages the text generation process, handling various generation
+    parameters, processing callbacks, and managing generation constraints. It supports both
+    standard language models and vision-language models.
+
+    Args:
+        model_kit (ModelKit | VisionModelKit): The initialized model to use for generation
+        prompt_tokens (List[int]): List of token IDs representing the input prompt
+        prompt_progress_callback (Optional[Callable[[float], bool]]): Callback function that receives
+            generation progress as a float between 0 and 100. Callback should return True to continue
+            prompt processing, or False to stop generation
+        images_b64 (Optional[List[str]]): List of base64-encoded images for vision-language models
+        max_image_size (Optional[tuple[int, int]]): Maximum dimensions (width, height) for images.
+            Images will be resized to fit within these dimensions while maintaining aspect ratio if
+            they exceed this size. If None, no resizing.
+        stop_strings (Optional[List[str]]): List of strings that will trigger generation to stop
+            when encountered
+        top_logprobs (Optional[int]): Number of top token probabilities to return per token
+            Must be <= MAX_TOP_LOGPROBS
+        repetition_penalty (Optional[float]): Penalty factor for repeated tokens. Higher values
+            discourage repetition
+        repetition_context_size (Optional[int]): Number of previous tokens to consider for
+            repetition penalty. Defaults to 20
+        temp (Optional[float]): Temperature for sampling. Higher values increase randomness
+        top_p (Optional[float]): Top-p (nucleus) sampling parameter
+        top_k (Optional[int]): Top-k sampling parameter
+        min_p (Optional[float]): Minimum probability threshold for token sampling
+        min_tokens_to_keep (Optional[int]): Minimum number of tokens to keep during sampling
+        seed (Optional[int]): Random seed for reproducible generation
+        json_schema (Optional[str]): JSON schema for structured output generation
+        max_tokens (Optional[int]): Maximum number of tokens to generate. Defaults to 10000000
+        speculative_decoding_toggle (Optional[bool]): If not set, use speculative decoding
+            if a draft model is loaded. If set to true, draft model must be loaded or else error.
+            If set to false, speculative decoding is disabled even if a draft model is loaded.
+        num_draft_tokens (Optional[int]): Number of tokens to draft when using speculative decoding
+
+    Yields:
+        GenerationResult: A named tuple containing:
+            - text (str): Generated text segment
+            - tokens (List[TokenLogprob]): List of generated tokens with their probabilities
+            - top_logprobs (List[List[TokenLogprob]]): Token probability information if requested
+            - stop_condition (Optional[GenerationStopCondition]): Information about why
+              generation stopped, if applicable
+
+    Raises:
+        ValueError: If top_logprobs exceeds MAX_TOP_LOGPROBS or if any parameters are invalid
+    """
+    set_seed(seed)
+
+    generate_args = {}
+    # For each call to create_generator, wrap all prompt progress calls with a ratchet that
+    # ensures reported progress monotonically increases. This is needed because prompt processing
+    # occurs in different places depending on the model type and prompt content. The prompt will only
+    # be processed once, but some contexts are not aware that the prompt is already processed, which
+    # can cause the progress to look like it is being reset when it is actually already complete.
+    # See https://github.com/lmstudio-ai/mlx-engine/issues/226.
+    prompt_progress_callback = ratchet(prompt_progress_callback)
+
+    # Set up kv cache
+    if type(model_kit) is not VisionModelKit:
+        for attr in ["max_kv_size", "kv_bits", "kv_group_size", "quantized_kv_start"]:
+            value = getattr(model_kit, attr, None)
+            if value is not None:
+                generate_args[attr] = value
+
+    # Set up repetition penalty
+    repetition_penalty_kwargs = {}
+    if repetition_penalty is not None:
+        repetition_penalty_kwargs["repetition_penalty"] = repetition_penalty
+        if repetition_context_size is not None:
+            repetition_penalty_kwargs["repetition_context_size"] = (
+                repetition_context_size
+            )
+
+    # Set up speculative decoding
+    draft_model = determine_draft_model_for_generation(
+        model_kit, speculative_decoding_toggle
+    )
+    configure_num_draft_tokens_in_generate_args(
+        model_kit, draft_model, num_draft_tokens, generate_args
+    )
+
+    # Process prompt
+    try:
+        input_tokens, input_embeddings = model_kit.process_prompt(
+            prompt_tokens,
+            images_b64,
+            prompt_progress_callback,
+            generate_args,
+            max_image_size,
+            speculative_decoding_toggle,
+        )
+    except StopPromptProcessing:
+        yield construct_user_cancelled_result()
+        return
+    if draft_model is None:
+        # input embeddings not yet supported for speculative decoding in mlx-lm
+        generate_args["input_embeddings"] = input_embeddings
+
+    # Setup logits processors
+    logits_processors = []
+    if repetition_penalty and repetition_penalty != 0.0:
+        cached_tokens = (
+            prompt_tokens[: -len(input_tokens)]
+            if len(input_tokens) > 0
+            else prompt_tokens
+        )
+        logits_processors.append(
+            RepetitionPenaltyProcessor(
+                token_history=cached_tokens, **repetition_penalty_kwargs
+            )
+        )
+
+    # Set up sampler
+    generate_args["sampler"] = make_sampler(
+        **{
+            k: v
+            for k, v in {
+                "temp": temp,
+                "top_p": top_p,
+                "min_p": min_p,
+                "min_tokens_to_keep": min_tokens_to_keep,
+                "top_k": top_k,
+            }.items()
+            if v is not None
+        }
+    )
+
+    # If using VisionModelKit, immediately record the token once it's sampled
+    if type(model_kit) is VisionModelKit:
+        sampler_func = generate_args["sampler"]
+
+        def sampler_func_wrapper(*args, **kwargs):
+            token = sampler_func(*args, **kwargs)
+            model_kit.record_sampled_token(token)
+            return token
+
+        generate_args["sampler"] = sampler_func_wrapper
+
+    # Validate top_logprobs
+    if top_logprobs is None:
+        top_logprobs = 0
+    if top_logprobs > MAX_TOP_LOGPROBS:
+        raise ValueError(
+            f"top_logprobs must be less than or equal to {MAX_TOP_LOGPROBS}"
+        )
+
+    # Keep track of tokens buffered by detokenizer to yield accurate generation results
+    token_buffer: List[Token] = []
+    top_logprobs_buffer: List[List[Token]] = []
+
+    tokenizer = model_kit.tokenizer
+
+    # Add outlines logits processor if json_schema is provided
+    is_structured_output_request = json_schema is not None
+    if is_structured_output_request:
+        logits_processors.append(
+            JSONLogitsProcessor(
+                json_schema,
+                OutlinesTransformerTokenizer(model_kit.tokenizer._tokenizer),
+                tensor_library_name="mlx",
+            )
+        )
+
+    # Set up stop string processor if non-empty stop_strings are provided
+    stop_string_processor = None
+    if stop_strings is not None and len(stop_strings) > 0:
+        stop_string_processor = StopStringProcessor(stop_strings, tokenizer)
+    text = ""
+
+    def _handle_stop_string_detected(
+        tokenizer,
+        stop_string_processor_result: StopStringProcessorResult,
+        text: str,
+        token_buffer: List[Token],
+        top_logprobs_buffer: List[List[Token]],
+    ) -> GenerationResult:
+        """
+        Helper method to Handle completion of text generation when a stop string is
+        encountered.
+
+        Args:
+            tokenizer: The tokenizer instance
+            stop_string_processor_result: Result from stop string processor
+            text: Current generated text
+            token_buffer: Buffer of generated tokens
+            top_logprobs_buffer: Buffer of token probabilities
+
+        Returns:
+            GenerationResult: Final generation result including stop condition
+        """
+        # Finalize detokenizer to get remaining text
+        detokenizer = tokenizer.detokenizer
+        detokenizer.finalize()
+        text += detokenizer.last_segment
+
+        # Process stop string by trimming text segment where it begins
+        stop_string = stop_string_processor_result.stop_string
+        stop_string_start_pos = text.find(stop_string)
+
+        if stop_string_start_pos != -1:
+            text = text[:stop_string_start_pos]
+        else:
+            # this is known to happen when the eos token is a stop string
+            sys.stderr.write(
+                f"[mlx-engine] Stop string '{stop_string}' not found in final text segment, "
+                "even though a full stop was detected. Not trimming final segment."
+            )
+
+        stop_condition = GenerationStopCondition(
+            stop_reason="stop_string",
+            stop_string=stop_string,
+            stop_tokens=stop_string_processor_result.stop_tokens,
+        )
+
+        return GenerationResult(
+            text=text,
+            tokens=token_buffer,
+            stop_condition=stop_condition,
+            top_logprobs=top_logprobs_buffer,
+        )
+
+    stream = stream_generate(
+        model=model_kit.model,
+        tokenizer=tokenizer,
+        draft_model=draft_model,
+        prompt=input_tokens,
+        max_tokens=max_tokens,
+        logits_processors=logits_processors,
+        prompt_progress_callback=token_count(throw_to_stop(prompt_progress_callback)),
+        prefill_step_size=PROMPT_PROCESSING_CHUNK_SIZE,
+        **generate_args,
+    )
+
+    while True:
+        try:
+            generation_result = next(stream)
+        except StopIteration:
+            break
+        except StopPromptProcessing:
+            yield construct_user_cancelled_result()
+            return
+
+        # Token processor
+        token = generation_result.token
+        text += generation_result.text
+        # record generated token to cache, if cache is active
+        if model_kit.is_cross_prompt_cache_active():
+            model_kit.record_token_to_cache(token)
+
+        logprobs = generation_result.logprobs
+        token_buffer.append(
+            Token(
+                token,
+                tokenizer.decode(token),
+                float(logprobs[token]),
+                from_draft=generation_result.from_draft,
+            )
+        )
+        if top_logprobs:
+            top_logprobs_buffer.append(
+                summarize_top_logprobs(tokenizer, logprobs, top_logprobs)
+            )
+
+        # Stop processor
+        if stop_string_processor is not None:
+            stop_string_processor_result = stop_string_processor.process_token(token)
+            if stop_string_processor_result.status == "full_stop":
+                yield _handle_stop_string_detected(
+                    tokenizer,
+                    stop_string_processor_result,
+                    text,
+                    token_buffer,
+                    top_logprobs_buffer,
+                )
+                break  # stop generation
+
+            # If we currently have generated a partial match with a stop sequence, or detected an
+            # in-progress multi-byte string, generate new tokens until we know if the stop sequence
+            # is hit or not (i.e., make sure not to yield yet)
+            if (
+                stop_string_processor_result.status == "partial_match"
+                or stop_string_processor_result.status == "multi_byte"
+            ):
+                continue
+
+        # Standard yield - yield when a non-empty text segment is available or eos token is hit
+        if text or token in tokenizer.eos_token_ids:
+            # populate stop_condition if we hit an eos token
+            stop_condition = None
+            if token in tokenizer.eos_token_ids:
+                stop_condition = GenerationStopCondition(
+                    stop_reason="eos_token",
+                    stop_string=tokenizer.decode(token),
+                    stop_tokens=[token],
+                )
+            yield GenerationResult(
+                text=text,
+                tokens=token_buffer,
+                stop_condition=stop_condition,
+                top_logprobs=top_logprobs_buffer,
+            )
+            token_buffer = []
+            top_logprobs_buffer = []
+            text = ""
+
+
+def tokenize(model_kit: ModelKit | VisionModelKit, prompt: str) -> List[int]:
+    """
+    Convert a text prompt into a list of token IDs using the model's tokenizer.
+
+    Args:
+        model_kit (ModelKit | VisionModelKit): The model kit instance containing the tokenizer
+            to use for tokenization
+        prompt (str): The raw text prompt to be tokenized
+
+    Returns:
+        List[int]: A list of integer token IDs representing the tokenized prompt,
+            ready for model input
+    """
+    return model_kit.tokenize(prompt)
diff --git a/server/backend/mlx_engine/model_kit/__init__.py b/server/backend/mlx_engine/model_kit/__init__.py
new file mode 100644
index 0000000..45e2a45
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/__init__.py
@@ -0,0 +1,10 @@
+"""
+Model Kit module with automatic compatibility patches.
+
+This module automatically applies compatibility patches for various model types
+by replacing classes in their respective modules with derived, compatible versions.
+"""
+
+from .patches.gemma3n import apply_patches as _apply_patches_gemma3n
+
+_apply_patches_gemma3n()
diff --git a/server/backend/mlx_engine/model_kit/model_kit.py b/server/backend/mlx_engine/model_kit/model_kit.py
new file mode 100644
index 0000000..1fe0547
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/model_kit.py
@@ -0,0 +1,236 @@
+import json
+from typing import Callable, Optional, List, Tuple
+import mlx_lm
+from mlx_lm.tokenizer_utils import TokenizerWrapper, StreamingDetokenizer
+from ..cache_wrapper import CacheWrapper
+from pathlib import Path
+import mlx.nn as nn
+import mlx.core as mx
+import logging
+from .vision_add_ons.base import BaseVisionAddOn
+from .vision_add_ons.gemma3 import Gemma3VisionAddOn
+from .vision_add_ons.pixtral import PixtralVisionAddOn
+from .vision_add_ons.gemma3n import Gemma3nVisionAddOn
+from .vision_add_ons.mistral3 import Mistral3VisionAddOn
+from ..utils.kv_cache_quantization import get_kv_cache_quantization_params
+from ..utils.prompt_processing import process_prompt_text_only
+from ..utils.fix_mistral_pre_tokenizer import fix_mistral_pre_tokenizer
+
+logger = logging.getLogger(__name__)
+
+
+class ModelKit:
+    """
+    Collection of objects and methods that are needed for operating a model.
+
+    Args:
+        model_path (Path): Path to the model directory containing model files.
+        vocab_only (bool): Only load vocabulary/tokenizer, not the full model.
+        max_kv_size (int): Maximum size of the key-value cache used during model inference.
+        kv_bits (Optional[int]): Number of bits for KV cache quantization. None disables quantization.
+        kv_group_size (Optional[int]): Group size for KV cache quantization. Defaults to 64.
+        quantized_kv_start (Optional[int]): Step to begin KV cache quantization when enabled. Defaults to 0.
+    """
+
+    VISION_ADD_ON_MAP = {
+        "gemma3": Gemma3VisionAddOn,
+        "gemma3n": Gemma3nVisionAddOn,
+        "mistral3": Mistral3VisionAddOn,
+        "pixtral": PixtralVisionAddOn,
+    }
+
+    # model state tracking
+    model: nn.Module = None
+    tokenizer: TokenizerWrapper = None
+    detokenizer: StreamingDetokenizer = None
+    cache_wrapper: Optional[CacheWrapper] = None
+    _cross_prompt_cache_active: bool = False
+    max_kv_size: Optional[int] = None
+    kv_bits: Optional[int] = None
+    kv_group_size: Optional[int] = None
+    quantized_kv_start: Optional[int] = None
+    draft_model: Optional[nn.Module] = None
+    model_type: Optional[str] = None
+    adapter_path: Optional[Path] = None
+
+    # multi-modal add-ons
+    vision_add_on: Optional[BaseVisionAddOn] = None
+
+    def _vocab_only_init(self, model_path: Path):
+        logger.info(f"Loading model (vocab-only) from {model_path}...")
+        self.tokenizer = mlx_lm.tokenizer_utils.load(model_path)
+        self.detokenizer = self.tokenizer.detokenizer
+        logger.info("Model (vocab-only) loaded successfully")
+
+    def _full_model_init(
+        self,
+        model_path: Path,
+        max_kv_size: Optional[int] = None,
+        kv_bits: Optional[int] = None,
+        kv_group_size: Optional[int] = None,
+        quantized_kv_start: Optional[int] = None,
+        adapter_path: Optional[str | Path] = None,
+    ):
+        kv_bits, kv_group_size, quantized_kv_start = get_kv_cache_quantization_params(
+            kv_bits,
+            kv_group_size,
+            quantized_kv_start,
+        )
+        if kv_bits and max_kv_size is not None:
+            # Quantized KV cache is only supported for non-rotating KV cache
+            logger.warning("max_kv_size is ignored when using KV cache quantization")
+            max_kv_size = None
+        self.model_path = model_path
+        self.adapter_path = Path(adapter_path) if adapter_path else None
+        logger.info(f"Loading model from {model_path}...")
+        config_json = json.loads((model_path / "config.json").read_text())
+        self.model_type = config_json.get("model_type", None)
+
+        self.model, self.tokenizer = mlx_lm.utils.load(
+            self.model_path, adapter_path=self.adapter_path
+        )
+        fix_mistral_pre_tokenizer(
+            tokenizer=self.tokenizer, model_path=model_path, model_type=self.model_type
+        )
+        self.detokenizer = self.tokenizer.detokenizer
+        self.cache_wrapper = CacheWrapper(
+            self.model,
+            max_kv_size,
+            kv_bits=kv_bits,
+            kv_group_size=kv_group_size,
+            quantized_kv_start=quantized_kv_start,
+        )
+        self.kv_bits = kv_bits
+        self.kv_group_size = kv_group_size
+        self.quantized_kv_start = quantized_kv_start
+        vision_add_on_class = self.VISION_ADD_ON_MAP.get(self.model_type)
+        should_load_vision_add_on = (
+            vision_add_on_class is not None and "vision_config" in config_json
+        )
+        if should_load_vision_add_on:
+            self.vision_add_on = vision_add_on_class(model_path)
+        logger.info("Model loaded successfully")
+
+    def __init__(
+        self,
+        model_path: Path,
+        vocab_only: bool = False,
+        max_kv_size: Optional[int] = None,
+        kv_bits: Optional[int] = None,
+        kv_group_size: Optional[int] = None,
+        quantized_kv_start: Optional[int] = None,
+        adapter_path: Optional[str | Path] = None,
+    ):
+        if vocab_only:
+            self._vocab_only_init(model_path)
+        else:
+            self._full_model_init(
+                model_path,
+                max_kv_size,
+                kv_bits,
+                kv_group_size,
+                quantized_kv_start,
+                adapter_path=adapter_path,
+            )
+
+    def tokenize(self, prompt: str) -> List[int]:
+        ids = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(prompt))
+        if isinstance(ids, int):
+            return [ids]
+        return ids
+
+    def process_prompt(
+        self,
+        prompt_tokens,
+        images_b64: Optional[List[str]],
+        prompt_progress_callback: Optional[Callable[[float], bool]],
+        generate_args: dict,
+        max_image_size: tuple[int, int] | None,
+        speculative_decoding_toggle: Optional[bool] = None,
+    ) -> Tuple[mx.array, Optional[mx.array]]:
+        ### TEXT-ONLY PROCESS_PROMPT ###
+        is_text_only_processing = images_b64 is None or len(images_b64) == 0
+        if is_text_only_processing:
+            self._cross_prompt_cache_active = True
+            if len(prompt_tokens) == 0:
+                logger.warning(
+                    "Received empty prompt. Generation quality will likely be poor"
+                )
+                # Models expect some sort of input, so add whitespace
+                prompt_tokens = self.tokenize(" ")
+            return process_prompt_text_only(
+                mx.array(prompt_tokens),
+                self.cache_wrapper,
+                generate_args,
+                self.draft_model,
+                speculative_decoding_toggle,
+                prompt_progress_callback,
+            ), None
+        ### WITH IMAGES PROMPT PROCESSING ###
+        if self.vision_add_on is None:
+            raise ValueError(
+                "Vision add-on is not loaded, but images were provided for processing"
+            )
+        self._cross_prompt_cache_active = False
+        input_ids, embeddings = self.vision_add_on.compute_embeddings(
+            self.model, prompt_tokens, images_b64, max_size=max_image_size
+        )
+        return input_ids, embeddings
+
+    def is_cross_prompt_cache_active(self) -> bool:
+        """
+        Check if cross-prompt caching is currently enabled.
+        Can be overridden by subclasses for custom behavior.
+        """
+        return self._cross_prompt_cache_active
+
+    def record_token_to_cache(self, token: int) -> None:
+        self.cache_wrapper.record_generated_token(token)
+
+    @staticmethod
+    def is_supported_vision_arch(model_arch: str) -> bool:
+        """
+        Determines if the specified model architecture has vision support.
+
+        Args:
+            model_arch (str): The model architecture identifier to check
+
+        Returns:
+            bool: True if vision is supported, False otherwise
+        """
+        return model_arch in ModelKit.VISION_ADD_ON_MAP
+
+    def is_draft_model_compatible(self, path: str | Path) -> bool:
+        path = Path(path)
+        if self.tokenizer is None:
+            logger.warning(
+                "Draft model compatibility check requires at least a vocab-only loaded main model"
+            )
+            return False
+        if self.vision_add_on is not None:
+            logger.warning("Draft models are currently unsupported for vision models")
+            return False
+        draft_tokenizer = mlx_lm.tokenizer_utils.load(path)
+        if draft_tokenizer.vocab_size != self.tokenizer.vocab_size:
+            return False
+        return True
+
+    def load_draft_model(self, path: str | Path) -> None:
+        logger.info(f"Loading draft model from {path}...")
+        path = Path(path)
+        if self.model is None:
+            raise ValueError("Main model must be loaded before loading a draft model")
+        if not self.is_draft_model_compatible(path):
+            raise ValueError("Draft model is not compatible with main model")
+        self.draft_model, _ = mlx_lm.utils.load(path)
+        self.cache_wrapper.set_draft_model(self.draft_model)
+        logger.info("Draft model loaded")
+
+    def unload_draft_model(self) -> None:
+        if self.draft_model is None:
+            logger.info("No loaded draft model to unload")
+        else:
+            self.draft_model = None
+            self.cache_wrapper.unset_draft_model()
+        # Noticed that draft model memory would not be released without clearing metal cache
+        mx.clear_cache()
diff --git a/server/backend/mlx_engine/model_kit/patches/gemma3n.py b/server/backend/mlx_engine/model_kit/patches/gemma3n.py
new file mode 100644
index 0000000..51fd69c
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/patches/gemma3n.py
@@ -0,0 +1,65 @@
+"""
+Gemma3n compatibility patches using derive and override pattern.
+
+This module provides derived classes that inherit from the original mlx-lm classes
+and override specific methods to handle compatibility issues between mlx-vlm and mlx-lm.
+"""
+
+from mlx_lm.models.gemma3n import Model, TextConfig
+from mlx.utils import tree_flatten, tree_unflatten
+import inspect
+
+
+class CompatibleTextConfig(TextConfig):
+    """
+    TextConfig that handles intermediate_size as list or integer.
+
+    mlx-vlm's conversion (transformers under the hood) changes the
+    "text_config" -> "intermediate_size" value from a single integer to
+    a list of integers of length number of layers.
+    mlx-lm's model loader expects it to be a single integer.
+    This class handles both formats by taking the first value if it's a list.
+    """
+
+    @classmethod
+    def from_dict(cls, params):
+        config_dict = {
+            k: v for k, v in params.items() if k in inspect.signature(cls).parameters
+        }
+        if "intermediate_size" in config_dict:
+            intermediate_size = config_dict["intermediate_size"]
+            if isinstance(intermediate_size, list):
+                if len(intermediate_size) > 0:
+                    config_dict["intermediate_size"] = intermediate_size[0]
+                else:
+                    config_dict.pop("intermediate_size")
+        return cls(**config_dict)
+
+
+class CompatibleModel(Model):
+    """
+    Model that handles mlx-vlm compatible weight ordering.
+
+    mlx-vlm's conversion changes the weight keys from the original huggingface weights.
+    For example, "model.language_model.embed_tokens.weight" becomes
+    "language_model.model.embed_tokens.weight".
+    mlx-lm expects the weight keys to be in the original huggingface order.
+    This class handles both weight formats.
+    """
+
+    def sanitize(self, weights):
+        weights = tree_unflatten(list(weights.items()))
+        if weights.get("language_model", {}).get("model", None) is not None:
+            weights = {"model": {"language_model": weights["language_model"]["model"]}}
+        weights = dict(tree_flatten(weights))
+        return super().sanitize(weights)
+
+
+def apply_patches():
+    """
+    Apply gemma3n compatibility patches by replacing classes in the mlx_lm module.
+    """
+    import mlx_lm.models.gemma3n
+
+    mlx_lm.models.gemma3n.Model = CompatibleModel
+    mlx_lm.models.gemma3n.TextConfig = CompatibleTextConfig
diff --git a/server/backend/mlx_engine/model_kit/vision_add_ons/base.py b/server/backend/mlx_engine/model_kit/vision_add_ons/base.py
new file mode 100644
index 0000000..ba6bbff
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/vision_add_ons/base.py
@@ -0,0 +1,34 @@
+from abc import ABC, abstractmethod
+
+import mlx.core as mx
+from mlx import nn
+
+
+class BaseVisionAddOn(ABC):
+    """
+    Base class that defines the interface for a VisionAddOn.
+    """
+
+    @abstractmethod
+    def __init__(self):
+        """
+        Where load of vision model components is intended to occur.
+        """
+
+    @abstractmethod
+    def compute_embeddings(
+        self,
+        text_model: nn.Module,
+        prompt_tokens: mx.array,
+        images_b64: list[str],
+        max_size: tuple[int, int] | None,
+    ) -> tuple[mx.array, mx.array]:
+        """
+        Returns input ids and input embeddings for the language model after text/image merging of the prompt.
+
+        Args:
+            text_model: Text model for embedding tokens
+            prompt_tokens: Input prompt tokens
+            images_b64: List of base64-encoded images
+            max_size: Maximum image size as (width, height) tuple. If None, no resizing.
+        """
diff --git a/server/backend/mlx_engine/model_kit/vision_add_ons/gemma3.py b/server/backend/mlx_engine/model_kit/vision_add_ons/gemma3.py
new file mode 100644
index 0000000..cae7c11
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/vision_add_ons/gemma3.py
@@ -0,0 +1,87 @@
+import logging
+from pathlib import Path
+
+from mlx import nn
+import mlx.core as mx
+
+from mlx_vlm.models.gemma3 import (
+    VisionModel as Gemma3VisionTower,
+    ModelConfig as Gemma3ModelConfig,
+    VisionConfig as Gemma3VisionConfig,
+    TextConfig as Gemma3TextConfig,
+    Model as Gemma3CombinedModel,  # for prepare_inputs_for_multimodal
+)
+from mlx_vlm.models.gemma3.gemma3 import Gemma3MultiModalProjector
+
+from .base import BaseVisionAddOn
+from .process_prompt_with_images import (
+    common_process_prompt_with_images,
+)
+from .load_utils import load_vision_addon
+
+logger = logging.getLogger(__name__)
+
+
+class Gemma3VisionAddOn(BaseVisionAddOn):
+    """
+    Vision add-on for Gemma3 model. Uses mlx-vlm vision components of Gemma3.
+    """
+
+    def __init__(self, model_path: Path):
+        """Initialize Gemma3VisionAddOn with vision components loaded from the given path."""
+        super().__init__()
+
+        # Load vision model components, configuration, and processor
+        self.vision_tower, self.multi_modal_projector, self.config, self.processor = (
+            load_vision_addon(
+                model_path=model_path,
+                model_config_class=Gemma3ModelConfig,
+                vision_config_class=Gemma3VisionConfig,
+                text_config_class=Gemma3TextConfig,
+                vision_tower_class=Gemma3VisionTower,
+                multi_modal_projector_class=Gemma3MultiModalProjector,
+                logger=logger,
+            )
+        )
+
+    def compute_embeddings(
+        self,
+        text_model: nn.Module,
+        prompt_tokens: mx.array,
+        images_b64: list[str],
+        max_size: tuple[int, int] | None,
+    ) -> tuple[mx.array, mx.array]:
+        """Compute input_ids and embeddings for text with images."""
+        input_ids, pixel_values, attention_mask, other_model_inputs = (
+            common_process_prompt_with_images(
+                prompt_tokens=prompt_tokens,
+                images_b64=images_b64,
+                processor=self.processor,
+                config=self.config,
+                max_size=max_size,
+            )
+        )
+        input_embeddings = text_model.language_model.model.embed_tokens(input_ids)
+
+        # Process image through vision tower
+        hidden_state, _, _ = self.vision_tower(
+            pixel_values.transpose(0, 2, 3, 1).astype(input_embeddings.dtype),
+            output_hidden_states=True,
+        )
+
+        # Format image features
+        image_features = hidden_state.astype(pixel_values.dtype)
+        image_features = self.multi_modal_projector(image_features)
+
+        # Combine image and text embeddings
+        final_inputs_embeds, _ = Gemma3CombinedModel.prepare_inputs_for_multimodal(
+            self.config.hidden_size,
+            self.config.pad_token_id,
+            self.config.image_token_index,
+            image_features,
+            input_embeddings,
+            input_ids,
+            attention_mask,
+        )
+        # remove batch dimension
+        return input_ids.squeeze(0), final_inputs_embeds.squeeze(0)
diff --git a/server/backend/mlx_engine/model_kit/vision_add_ons/gemma3n.py b/server/backend/mlx_engine/model_kit/vision_add_ons/gemma3n.py
new file mode 100644
index 0000000..4e5d652
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/vision_add_ons/gemma3n.py
@@ -0,0 +1,159 @@
+from pathlib import Path
+from mlx import nn
+import mlx.core as mx
+from mlx_vlm.models.gemma3n import (
+    VisionModel as Gemma3nVisionTower,
+    ModelConfig as Gemma3nModelConfig,
+    VisionConfig as Gemma3nVisionConfig,
+    TextConfig as Gemma3nTextConfig,
+    Model as Gemma3nCombinedModel,
+)
+from mlx_vlm.models.gemma3n.gemma3n import Gemma3nMultimodalEmbedder
+from mlx_vlm.utils import sanitize_weights, load_processor
+import logging
+
+
+from .base import BaseVisionAddOn
+from .process_prompt_with_images import (
+    common_process_prompt_with_images,
+)
+from .load_utils import (
+    load_and_filter_weights,
+    load_and_parse_config,
+    maybe_apply_quantization,
+    prepare_components,
+)
+import json
+
+
+logger = logging.getLogger(__name__)
+
+
+class Gemma3nVisionComponents(nn.Module):
+    def __init__(self, vision_tower: nn.Module, embed_vision: nn.Module):
+        super().__init__()
+        self.vision_tower = vision_tower
+        self.embed_vision = embed_vision
+
+
+class Gemma3nVisionAddOn(BaseVisionAddOn):
+    """
+    Vision add-on for Gemma3n model. Uses mlx-vlm vision components of Gemma3n.
+    """
+
+    def __init__(self, model_path: Path):
+        """Initialize Gemma3nVisionAddOn with vision components loaded from the given path."""
+        super().__init__()
+
+        # The gemma3n weights were re-uploaded by google on 20250710
+        # The re-upload transposed two of the axis of the weights
+        # Here, check to see if we're using a model uploaded before 20250710
+        self.using_legacy_weights = False
+        with open(model_path / "config.json", "r") as f:
+            config_json = json.load(f)
+            if (
+                "text_config" in config_json
+                and "query_pre_attn_scalar" in config_json["text_config"]
+            ):
+                self.using_legacy_weights = True
+
+        config, config_dict = load_and_parse_config(
+            model_path=model_path,
+            model_config_class=Gemma3nModelConfig,
+            vision_config_class=Gemma3nVisionConfig,
+            text_config_class=Gemma3nTextConfig,
+        )
+
+        components = Gemma3nVisionComponents(
+            vision_tower=Gemma3nVisionTower(config.vision_config),
+            embed_vision=Gemma3nMultimodalEmbedder(
+                config.vision_config, config.text_config
+            ),
+        )
+        if self.using_legacy_weights:
+            try:
+                if hasattr(components.vision_tower, "timm_model"):
+                    del components.vision_tower.timm_model.conv_stem.conv.bias
+            except AttributeError:
+                logger.warning("Could not delete legacy bias: attribute not found")
+        processor = load_processor(model_path=model_path, add_detokenizer=True)
+        vision_weights = load_and_filter_weights(model_path, components)
+        vision_weights = sanitize_weights(
+            components.vision_tower.__class__, vision_weights, config.vision_config
+        )
+        maybe_apply_quantization(components, config_dict, vision_weights)
+        prepare_components(components, vision_weights)
+
+        logger.info(
+            f"Vision add-on loaded successfully from {model_path}",
+        )
+
+        self.vision_tower = components.vision_tower
+        self.embed_vision = components.embed_vision
+        self.config = config
+        self.processor = processor
+
+    def compute_embeddings(
+        self,
+        text_model: nn.Module,
+        prompt_tokens: mx.array,
+        images_b64: list[str],
+        max_size: tuple[int, int] | None,
+    ) -> tuple[mx.array, mx.array]:
+        """Compute input_ids and embeddings for text with images."""
+        input_ids, pixel_values, attention_mask, other_model_inputs = (
+            common_process_prompt_with_images(
+                prompt_tokens=prompt_tokens,
+                images_b64=images_b64,
+                processor=self.processor,
+                config=self.config,
+                max_size=max_size,
+            )
+        )
+        assert input_ids is not None
+
+        # See mlx_vlm.models.gemma3n.gemma3n.Model.get_input_embeddings
+        # This implementation was based on commit mlx-vlm commit ebafa5a789ed1a8e050b8366ae4e845dbe640b90
+        # It differs slightly from mlx-vlm in the bounds on the vision_mask.
+        # However, the two calculations should be equivalent (vision vocab offset + size) == audio vocab offset
+        inputs_embeds = text_model.model.language_model.embed_tokens(input_ids)
+        vision_mask = mx.logical_and(
+            input_ids >= self.embed_vision.vocab_offset,
+            input_ids < self.embed_vision.vocab_offset + self.embed_vision.vocab_size,
+        )
+        dummy_vision_token_id = (
+            self.embed_vision.vocab_offset + self.embed_vision.vocab_size - 1
+        )
+        vision_tokens = mx.where(vision_mask, input_ids, dummy_vision_token_id)
+        vision_embeds_flat = self.embed_vision(input_ids=vision_tokens)
+        inputs_embeds = mx.where(
+            vision_mask[..., None], vision_embeds_flat, inputs_embeds
+        )
+
+        if self.using_legacy_weights:
+            # The array is still in pytorch format here (NCHW)
+            # Transpose the HW axes
+            pixel_values = pixel_values.swapaxes(2, 3)
+
+        # Process image through vision tower, then embed into language model space
+        image_features = Gemma3nCombinedModel.get_image_features(
+            pixel_values,
+            self.vision_tower,
+            self.config,
+            self.embed_vision,
+        )
+
+        # Construct mask that matches image embedding locations
+        special_modality_mask = mx.expand_dims(
+            input_ids == self.config.image_token_id, -1
+        )
+        special_modality_mask = mx.broadcast_to(
+            special_modality_mask, inputs_embeds.shape
+        )
+
+        # Construct embeddings with image and text tokens interleaved per special modality mask
+        final_inputs_embeds = Gemma3nCombinedModel.merge_multimodal_and_text(
+            inputs_embeds, image_features, special_modality_mask, "image"
+        )
+        # remove batch dimension
+        return input_ids.squeeze(0), final_inputs_embeds.squeeze(0)
diff --git a/server/backend/mlx_engine/model_kit/vision_add_ons/load_utils.py b/server/backend/mlx_engine/model_kit/vision_add_ons/load_utils.py
new file mode 100644
index 0000000..1a90c2d
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/vision_add_ons/load_utils.py
@@ -0,0 +1,257 @@
+import glob
+import json
+from pathlib import Path
+from typing import Any, Tuple, Type
+import mlx.core as mx
+from mlx import nn
+from mlx_vlm.utils import sanitize_weights, load_processor, skip_multimodal_module
+import logging
+
+
+def load_and_parse_config(
+    model_path: Path,
+    model_config_class: Any,
+    vision_config_class: Any,
+    text_config_class: Any,
+) -> Tuple[Any, dict]:
+    """
+    Load and parse vision model configuration from config.json.
+
+    Args:
+        model_path: Path to the model directory
+        model_config_class: Configuration class for the model
+        vision_config_class: Configuration class for vision component
+        text_config_class: Configuration class for text component
+
+    Returns:
+        Tuple containing:
+            - The fully initialized config object
+            - The raw config dictionary (needed for quantization later)
+    """
+    config_path = model_path / "config.json"
+    if not config_path.exists():
+        raise FileNotFoundError(f"Configuration file not found at {config_path}")
+
+    config_dict = json.loads(config_path.read_text())
+    config = model_config_class.from_dict(config_dict)
+    config.vision_config = vision_config_class.from_dict(config.vision_config)
+    config.text_config = text_config_class.from_dict(config.text_config)
+
+    # hack for lfm2_vl, which uses a `vision_feature_layer` to reduce the number of actual layers
+    # https://github.com/Blaizzy/mlx-vlm/blob/f02d63e8f5b521e8c75f129a63d2660efd132693/mlx_vlm/models/lfm2_vl/lfm2_vl.py#L98-L101
+    if (
+        hasattr(config.text_config, "model_type")
+        and "lfm2" in config.text_config.model_type
+    ):
+        vision_feature_layer = config_dict.get("vision_feature_layer", -1)
+        if vision_feature_layer != -1:
+            config.vision_config.num_hidden_layers += vision_feature_layer + 1
+            config_dict["vision_config"]["num_hidden_layers"] = (
+                config.vision_config.num_hidden_layers
+            )
+
+    return config, config_dict
+
+
+class VisionComponents(nn.Module):
+    def __init__(
+        self, vision_tower: nn.Module, multi_modal_projector: nn.Module | None = None
+    ):
+        super().__init__()
+        self.vision_tower = vision_tower
+        self.multi_modal_projector = multi_modal_projector
+
+
+def create_vision_components(
+    config: Any,
+    vision_tower_class: Type[nn.Module],
+    multi_modal_projector_class: Type[nn.Module] | None,
+) -> VisionComponents:
+    """
+    Create vision model components and wrap them in a container module.
+
+    Args:
+        config: The fully initialized config object
+        vision_tower_class: The vision tower model class
+        multi_modal_projector_class: The multi-modal projector class
+
+    Returns:
+        The container module with both components
+    """
+    components = VisionComponents(
+        vision_tower_class(config.vision_config),
+        multi_modal_projector_class(config) if multi_modal_projector_class else None,
+    )
+    return components
+
+
+def load_and_filter_weights(
+    model_path: Path,
+    components: nn.Module,
+) -> dict:
+    """
+    Load model weights from safetensors files and filter for vision-related weights.
+
+    Args:
+        model_path: Path to the model directory
+        components: The vision components container module
+
+    Returns:
+        Dictionary containing filtered vision-related weights
+    """
+    # Load model weights
+    weight_files = glob.glob(str(model_path / "*.safetensors"))
+    if not weight_files:
+        raise FileNotFoundError(
+            f"Failed to load vision add-on: {model_path} does not contain any safetensors files"
+        )
+
+    # Load and filter weights
+    weights = {}
+    for wf in weight_files:
+        weights.update(mx.load(wf))
+
+    # Filter only vision-related weights
+    vision_weights = {
+        k: v
+        for k, v in weights.items()
+        if any(k.startswith(name) for name in components.children().keys())
+    }
+
+    return vision_weights
+
+
+def maybe_apply_quantization(
+    components: nn.Module,
+    config_dict: dict,
+    vision_weights: dict,
+) -> None:
+    """
+    Apply quantization to vision components if specified in config.
+
+    Args:
+        components: The vision components container module
+        config_dict: Raw config dictionary containing quantization settings
+        vision_weights: The vision-related weights dictionary
+    """
+    # Apply quantization if specified in config
+    if (quantization := config_dict.get("quantization", None)) is not None:
+        # Copied from mlx_vlm/utils.py at commit
+        # 65ecc837f24d0f8b138f300c7efef87f00fba74d
+        skip_vision = config_dict.get("vision_config", {}).get("skip_vision", False)
+
+        def get_class_predicate(p, m):
+            # Always skip vision and audio models
+            if skip_multimodal_module(p) and skip_vision:
+                return False
+            # Handle custom per layer quantizations
+            if p in config_dict["quantization"]:
+                return config_dict["quantization"][p]
+            if not hasattr(m, "to_quantized"):
+                return False
+            # Skip layers not divisible by 64
+            if hasattr(m, "weight") and m.weight.size % 64 != 0:
+                return False
+            # Handle legacy models which may not have everything quantized
+            return f"{p}.scales" in vision_weights
+
+        quantize_kwargs = {}
+        if "bits" in quantization:
+            quantize_kwargs["bits"] = quantization["bits"]
+        if "group_size" in quantization:
+            quantize_kwargs["group_size"] = quantization["group_size"]
+        nn.quantize(
+            components,
+            class_predicate=get_class_predicate,
+            **quantize_kwargs,
+        )
+
+
+def prepare_components(
+    components: nn.Module,
+    vision_weights: dict,
+) -> None:
+    """
+    Prepare vision components by loading weights and setting to evaluation mode.
+
+    Args:
+        components: The vision components container module
+        vision_weights: The vision-related weights dictionary
+    """
+    # Load weights into the model
+    components.load_weights(list(vision_weights.items()))
+
+    # Always load weights to memory here
+    mx.eval(components.parameters())
+
+    # Set model to evaluation mode
+    components.eval()
+
+
+def load_vision_addon(
+    model_path: Path,
+    model_config_class: Any,
+    vision_config_class: Any,
+    text_config_class: Any,
+    vision_tower_class: Type[nn.Module],
+    multi_modal_projector_class: Type[nn.Module] | None,
+    logger: logging.Logger,
+    processor_kwargs: dict | None = None,
+) -> Tuple[nn.Module, nn.Module | None, Any, Any]:
+    """
+    Load vision add-on components, configuration, and processor.
+
+    Args:
+        model_path: Path to the model directory
+        model_config_class: Configuration class for the model
+        vision_config_class: Configuration class for vision component
+        text_config_class: Configuration class for text component
+        vision_tower_class: The vision tower model class
+        multi_modal_projector_class: The multi-modal projector class
+        logger: logging.Logger
+
+    Returns:
+        Tuple containing:
+            - The vision tower module
+            - The multi-modal projector module
+            - The model configuration
+            - The processor for handling images and text
+    """
+    # Load and parse configuration
+    config, config_dict = load_and_parse_config(
+        model_path, model_config_class, vision_config_class, text_config_class
+    )
+
+    # Create model components
+    components = create_vision_components(
+        config,
+        vision_tower_class,
+        multi_modal_projector_class,
+    )
+
+    # Load processor
+    processor = load_processor(
+        model_path=model_path,
+        add_detokenizer=True,
+        **(processor_kwargs or {}),
+    )
+
+    # Load and filter weights
+    vision_weights = load_and_filter_weights(model_path, components)
+
+    # Sanitize weights for vision tower
+    vision_weights = sanitize_weights(
+        components.vision_tower.__class__, vision_weights, config.vision_config
+    )
+
+    # Apply quantization if specified in config
+    maybe_apply_quantization(components, config_dict, vision_weights)
+
+    # Prepare components (load weights and set to eval mode)
+    prepare_components(components, vision_weights)
+
+    logger.info(
+        f"Vision add-on loaded successfully from {model_path}",
+    )
+
+    return components.vision_tower, components.multi_modal_projector, config, processor
diff --git a/server/backend/mlx_engine/model_kit/vision_add_ons/mistral3.py b/server/backend/mlx_engine/model_kit/vision_add_ons/mistral3.py
new file mode 100644
index 0000000..bf308bd
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/vision_add_ons/mistral3.py
@@ -0,0 +1,124 @@
+import logging
+from pathlib import Path
+
+from mlx import nn
+import mlx.core as mx
+
+from mlx_vlm.models.mistral3 import (
+    VisionModel as Mistral3VisionTower,
+    ModelConfig as Mistral3ModelConfig,
+    VisionConfig as Mistral3VisionConfig,
+    TextConfig as Mistral3TextConfig,
+    Model as Mistral3CombinedModel,
+)
+from mlx_vlm.models.mistral3.mistral3 import Mistral3MultiModalProjector
+from .base import BaseVisionAddOn
+from .process_prompt_with_images import (
+    common_process_prompt_with_images,
+)
+from .load_utils import load_vision_addon
+
+logger = logging.getLogger(__name__)
+
+
+class Mistral3VisionAddOn(BaseVisionAddOn):
+    """
+    Vision add-on for Mistral3 models.
+    """
+
+    def __init__(self, model_path: Path):
+        """Initialize Mistral3VisionAddOn with vision components loaded from the given path."""
+        super().__init__()
+
+        processor_kwargs: dict | None = None
+        if self._is_lmstudio_mistral_3_2_small(model_path):
+            processor_kwargs = {
+                "patch_size": 14,
+                "spatial_merge_size": 2,
+            }
+            logger.info(
+                "Detected LM Studio Mistral Small 3.2 model. "
+                f"Using custom processor kwargs: {processor_kwargs}"
+            )
+
+        self.vision_tower, self.multi_modal_projector, self.config, self.processor = (
+            load_vision_addon(
+                model_path=model_path,
+                model_config_class=Mistral3ModelConfig,
+                vision_config_class=Mistral3VisionConfig,
+                text_config_class=Mistral3TextConfig,
+                vision_tower_class=Mistral3VisionTower,
+                multi_modal_projector_class=Mistral3MultiModalProjector,
+                logger=logger,
+                processor_kwargs=processor_kwargs,
+            )
+        )
+
+    def compute_embeddings(
+        self,
+        text_model: nn.Module,
+        prompt_tokens: mx.array,
+        images_b64: list[str],
+        max_size: tuple[int, int] | None,
+    ) -> tuple[mx.array, mx.array]:
+        """
+        Compute embeddings for text with images.
+
+        This method is heavily based on the mlx-vlm's mistral3 `get_input_embeddings`
+        https://github.com/Blaizzy/mlx-vlm/blob/2c3014fd40962bd5320ad611502e7e26cae08926/mlx_vlm/models/mistral3/mistral3.py#L240-L279
+        """
+
+        input_ids, pixel_values, _, other_model_inputs = (
+            common_process_prompt_with_images(
+                prompt_tokens=prompt_tokens,
+                images_b64=images_b64,
+                processor=self.processor,
+                config=self.config,
+                max_size=max_size,
+            )
+        )
+
+        image_sizes_list = other_model_inputs["image_sizes"]
+        image_sizes = mx.array(image_sizes_list)
+
+        if pixel_values is None:
+            return input_ids.squeeze(0), text_model.language_model.model.embed_tokens(
+                input_ids
+            ).squeeze(0)
+
+        # Get the input embeddings from the language model
+        inputs_embeds = text_model.language_model.model.embed_tokens(input_ids)
+
+        # Get the output hidden states from the vision model
+        if isinstance(pixel_values, list):
+            pixel_values = mx.concatenate(
+                [mx.array(pv)[None, ...] for pv in pixel_values], axis=0
+            )
+        if pixel_values.ndim == 3:
+            pixel_values = pixel_values[None, ...]
+
+        # Pass pixel_values as list of images, as each image is individually run through conv2d and position encoding
+        # Reference code from transformers: https://github.com/huggingface/transformers/blob/main/src/transformers/models/pixtral/modeling_pixtral.py#L479C9-L479C21
+        # and mistral_inference: https://github.com/mistralai/mistral-inference/blob/main/src/mistral_inference/vision_encoder.py#L85
+        *_, hidden_states = self.vision_tower(
+            pixel_values.transpose(0, 2, 3, 1),
+            output_hidden_states=True,
+        )
+        # Select the hidden states from the desired layer
+        selected_image_feature = hidden_states[self.config.vision_feature_layer]
+
+        # Pass image features through the multi-modal projector
+        image_features = self.multi_modal_projector(selected_image_feature, image_sizes)
+
+        # Insert special image tokens in the input_ids
+        final_inputs_embeds = Mistral3CombinedModel.merge_input_ids_with_image_features(
+            self.config.image_token_index, image_features, inputs_embeds, input_ids
+        )
+        # remove batch dimension
+        return input_ids.squeeze(0), final_inputs_embeds.squeeze(0)
+
+    @staticmethod
+    def _is_lmstudio_mistral_3_2_small(model_path: Path) -> bool:
+        return "lmstudio-community/Mistral-Small-3.2-24B-Instruct-2506-MLX" in str(
+            model_path
+        )
diff --git a/server/backend/mlx_engine/model_kit/vision_add_ons/pixtral.py b/server/backend/mlx_engine/model_kit/vision_add_ons/pixtral.py
new file mode 100644
index 0000000..59f1a9d
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/vision_add_ons/pixtral.py
@@ -0,0 +1,91 @@
+import logging
+from pathlib import Path
+
+from mlx import nn
+import mlx.core as mx
+
+from mlx_vlm.models.pixtral import (
+    VisionModel as PixtralVisionTower,
+    ModelConfig as PixtralModelConfig,
+    VisionConfig as PixtralVisionConfig,
+    TextConfig as PixtralTextConfig,
+    Model as PixtralCombinedModel,  # for merge_input_ids_with_image_features
+)
+from mlx_vlm.models.pixtral.pixtral import (
+    LlavaMultiModalProjector as PixtralMultiModalProjector,
+)
+
+from .base import BaseVisionAddOn
+from .process_prompt_with_images import (
+    common_process_prompt_with_images,
+)
+from .load_utils import load_vision_addon
+
+logger = logging.getLogger(__name__)
+
+
+class PixtralVisionAddOn(BaseVisionAddOn):
+    """
+    Vision add-on for Pixtral model. Uses mlx-vlm vision components of Pixtral.
+    """
+
+    def __init__(self, model_path: Path):
+        """Initialize PixtralVisionAddOn with vision components loaded from the given path."""
+        super().__init__()
+
+        # Load vision model components, configuration, and processor
+        self.vision_tower, self.multi_modal_projector, self.config, self.processor = (
+            load_vision_addon(
+                model_path=model_path,
+                model_config_class=PixtralModelConfig,
+                vision_config_class=PixtralVisionConfig,
+                text_config_class=PixtralTextConfig,
+                vision_tower_class=PixtralVisionTower,
+                multi_modal_projector_class=PixtralMultiModalProjector,
+                logger=logger,
+            )
+        )
+
+    def compute_embeddings(
+        self,
+        text_model: nn.Module,
+        prompt_tokens: mx.array,
+        images_b64: list[str],
+        max_size: tuple[int, int] | None,
+    ) -> tuple[mx.array, mx.array]:
+        """Compute input_ids and embeddings for text with images."""
+        input_ids, pixel_values, attention_mask, other_model_inputs = (
+            common_process_prompt_with_images(
+                prompt_tokens=prompt_tokens,
+                images_b64=images_b64,
+                processor=self.processor,
+                config=self.config,
+                max_size=max_size,
+            )
+        )
+        input_embeddings = text_model.language_model.model.embed_tokens(input_ids)
+
+        if isinstance(pixel_values, list):
+            pixel_values = mx.concatenate(
+                [mx.array(pv)[None, ...] for pv in pixel_values], axis=0
+            )
+        if pixel_values.ndim == 3:
+            pixel_values = pixel_values[None, ...]
+
+        # Process image through vision tower
+        *_, hidden_states = self.vision_tower(
+            pixel_values.transpose(0, 2, 3, 1),
+            output_hidden_states=True,
+        )
+        # Select the hidden states from the desired layer
+        selected_image_feature = hidden_states[self.config.vision_feature_layer]
+
+        # Pass image features through the multi-modal projector
+        image_features = self.multi_modal_projector(selected_image_feature)
+
+        # Insert special image tokens in the input_ids
+        final_inputs_embeds = PixtralCombinedModel.merge_input_ids_with_image_features(
+            self.config.image_token_index, image_features, input_embeddings, input_ids
+        )
+        # remove batch dimension
+        return input_ids.squeeze(0), final_inputs_embeds.squeeze(0)
diff --git a/server/backend/mlx_engine/model_kit/vision_add_ons/process_prompt_with_images.py b/server/backend/mlx_engine/model_kit/vision_add_ons/process_prompt_with_images.py
new file mode 100644
index 0000000..1a56f7f
--- /dev/null
+++ b/server/backend/mlx_engine/model_kit/vision_add_ons/process_prompt_with_images.py
@@ -0,0 +1,83 @@
+from typing import List, Union, NamedTuple
+import mlx.core as mx
+from mlx_vlm import prepare_inputs
+from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
+import logging
+
+from ...utils.image_utils import convert_to_pil, custom_resize
+
+logger = logging.getLogger(__name__)
+
+
+class ProcessedImagePrompt(NamedTuple):
+    input_ids: mx.array
+    pixel_values: mx.array
+    attention_mask: mx.array
+    other_inputs: dict
+
+
+def common_process_prompt_with_images(
+    prompt_tokens: mx.array,
+    images_b64: List[str],
+    processor: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
+    config,  # expected to be a ModelConfig object as defined by mlx-vlm. Can vary by model
+    max_size: tuple[int, int] | None,
+) -> ProcessedImagePrompt:
+    """
+    Common prompt processing used by mlx-vlm vision add-ons.
+    Returns a named tuple with all processed inputs.
+
+    Args:
+        prompt_tokens: Input prompt tokens
+        images_b64: List of base64-encoded images
+        processor: Tokenizer/processor for the model
+        config: Model configuration object
+        max_size: Maximum image size as (width, height) tuple. If None, no resizing.
+    """
+    if len(images_b64) == 0:
+        raise ValueError("Images must be non-empty")
+    detokenizer = processor.detokenizer
+    detokenizer.reset()
+    [detokenizer.add_token(token) for token in prompt_tokens]
+    detokenizer.finalize()
+    prompt = detokenizer.text
+
+    logger.info(f"Prompt dump: {prompt}\n")
+
+    images = convert_to_pil(images_b64)
+    images = custom_resize(images, max_size=max_size)
+
+    if hasattr(config, "image_token_index"):
+        image_token_index = config.image_token_index
+    elif (
+        hasattr(config, "vision_config")
+        and config.vision_config is not None
+        and hasattr(config.vision_config, "image_token_id")
+    ):
+        image_token_index = config.vision_config.image_token_id
+    else:
+        image_token_index = None
+
+    inputs = prepare_inputs(
+        processor=processor,
+        images=images,
+        prompts=prompt,
+        image_token_index=image_token_index,
+        resize_shape=None,
+    )
+
+    input_ids = inputs["input_ids"]
+    pixel_values = inputs["pixel_values"]
+    attention_mask = inputs["attention_mask"]
+    other_model_inputs = {
+        k: v
+        for k, v in inputs.items()
+        if k not in ["input_ids", "pixel_values", "attention_mask"]
+    }
+
+    return ProcessedImagePrompt(
+        input_ids=input_ids,
+        pixel_values=pixel_values,
+        attention_mask=attention_mask,
+        other_inputs=other_model_inputs,
+    )
diff --git a/server/backend/mlx_engine/processors/repetition_penalty_processor.py b/server/backend/mlx_engine/processors/repetition_penalty_processor.py
new file mode 100644
index 0000000..425cf96
--- /dev/null
+++ b/server/backend/mlx_engine/processors/repetition_penalty_processor.py
@@ -0,0 +1,50 @@
+import mlx.core as mx
+from mlx_lm.sample_utils import make_repetition_penalty
+
+"""
+Wrapper for the standard mlx-lm repetition penalty processor
+ref: https://github.com/ml-explore/mlx-lm/blob/69195f8632869d35306d085de7dc4e7d6954baac/mlx_lm/sample_utils.py#L245-L255
+
+This wrapper enables the repetition penalty processor to take into account the tokens that have already been cached,
+without the need for recomputing the logits for those tokens.
+"""
+
+
+class RepetitionPenaltyProcessor:
+    def __init__(
+        self,
+        token_history: list[int],
+        repetition_penalty: float,
+        repetition_context_size: int,
+    ):
+        self.token_history = token_history
+        self.repetition_context_size = repetition_context_size
+        self.repetition_penalty_function = make_repetition_penalty(
+            repetition_penalty, repetition_context_size
+        )
+
+    def __call__(self, tokens: mx.array, logits: mx.array) -> mx.array:
+        """
+        Apply repetition penalty to the logits, accounting for tokens that have already been processed within
+        the same prediction.
+
+        Args:
+            tokens: The tokens to be processed.
+            logits: The logits to be processed.
+        """
+        # append historical tokens s.t. repetition penalty accounts tokens that have already been processed in this gen
+        num_tokens_to_prepend_from_history = max(
+            self.repetition_context_size - len(tokens), 0
+        )
+        historical_tokens = (
+            self.token_history[-num_tokens_to_prepend_from_history:]
+            if num_tokens_to_prepend_from_history > 0
+            else []
+        )
+        historical_tokens_mx = mx.array(
+            historical_tokens,
+            dtype=mx.int64,
+        )
+        all_tokens_to_consider = mx.concat([historical_tokens_mx, tokens])
+        result = self.repetition_penalty_function(all_tokens_to_consider, logits)
+        return result
diff --git a/server/backend/mlx_engine/stop_string_processor.py b/server/backend/mlx_engine/stop_string_processor.py
new file mode 100644
index 0000000..70dfebc
--- /dev/null
+++ b/server/backend/mlx_engine/stop_string_processor.py
@@ -0,0 +1,177 @@
+"""Module for processing and handling stop strings during token generation."""
+
+from typing import List, Literal, NamedTuple, Optional, Sequence
+
+StopStringProcessorStatus = Literal[
+    "full_stop", "partial_match", "no_match", "multi_byte"
+]
+
+REPLACEMENT_CHAR = "\ufffd"
+
+
+class StopStringProcessorResult(NamedTuple):
+    """Result of stop string processing containing status and details."""
+
+    status: StopStringProcessorStatus
+    stop_string: Optional[str] = None  # populated if status is "full_stop"
+    # sequence of tokens that the stop_string was found in
+    stop_tokens: Optional[List[int]] = None  # populated if status is "full_stop"
+
+
+class StopStringProcessor:
+    """State-fully processes tokens to check for stop strings during generation."""
+
+    def __init__(self, stop_strings: List[str], tokenizer):
+        """
+        Args:
+            stop_strings: List of strings that should stop generation if found
+            tokenizer: Tokenizer instance for encoding token IDs to text
+
+        Raises:
+            ValueError: If stop_strings is empty or contains invalid values
+            TypeError: If stop_strings contains non-string values
+        """
+        if not stop_strings:
+            raise ValueError("Must provide at least one stop string")
+
+        if not all(isinstance(s, str) for s in stop_strings):
+            raise TypeError("All stop strings must be strings")
+
+        if any(not stop_string for stop_string in stop_strings):
+            raise ValueError("Stop strings cannot be empty")
+
+        self.stop_strings = stop_strings
+        self.tokenizer = tokenizer
+        self.token_id_buffer = []
+
+    def process_token(self, token: int) -> StopStringProcessorResult:
+        """Process a new token and check how it relates to stop strings.
+
+        Args:
+            token: The new token to process
+
+        Returns:
+            StopStringProcessorResult indicating the state of stop string detection
+        """
+        if len(self.stop_strings) == 0:
+            return StopStringProcessorResult(
+                status="no_match", stop_string=None, stop_tokens=None
+            )
+
+        self.token_id_buffer.append(token)
+
+        result = self._stopping_criteria(
+            string=self.tokenizer.decode(self.token_id_buffer),
+            stop_strings=self.stop_strings,
+        )
+
+        if result.status == "no_match":
+            # Can clear the buffer in no partial or full matches with stop sequences
+            self.token_id_buffer = []
+            return StopStringProcessorResult(
+                status="no_match", stop_string=None, stop_tokens=None
+            )
+
+        elif result.status == "partial_match":
+            return StopStringProcessorResult(
+                status="partial_match", stop_string=None, stop_tokens=None
+            )
+
+        elif result.status == "multi_byte":
+            return StopStringProcessorResult(
+                status="multi_byte", stop_string=None, stop_tokens=None
+            )
+
+        elif result.status == "full_stop":
+            return StopStringProcessorResult(
+                status="full_stop",
+                stop_string=result.stop_string,
+                stop_tokens=self.token_id_buffer,
+            )
+
+        else:
+            raise ValueError(f"Unknown StopProcessorStatus: {result.status}")
+
+    class _StoppingCriteriaResult(NamedTuple):
+        status: StopStringProcessorStatus
+        stop_string: Optional[str] = None  # populated if status is "full_stop"
+
+    def _stopping_criteria(
+        self,
+        string: str,
+        stop_strings: List[str],
+    ) -> _StoppingCriteriaResult:
+        """Check if stop strings match or partially match the input string
+
+        Args:
+            string: The string to check for stop strings
+            stop_strings: List of strings that should stop generation if found
+
+        Returns:
+            StopStringProcessorResult indicating match status and stop string if matched
+
+        Checks stopping conditions in priority order:
+        1. Incomplete UTF-8 string
+        2. Exact stop string match
+        3. Partial stop string match
+        """
+
+        result = (
+            self._check_incomplete_utf8(string)
+            or self._check_full_text_match(string, stop_strings)
+            or self._check_partial_text_match(string, stop_strings)
+            or self._StoppingCriteriaResult(status="no_match", stop_string=None)
+        )
+
+        return result
+
+    def _check_incomplete_utf8(self, string: str) -> Optional[_StoppingCriteriaResult]:
+        if len(string) == 0 or string[-1] == REPLACEMENT_CHAR:
+            return self._StoppingCriteriaResult(status="multi_byte", stop_string=None)
+        return None
+
+    def _check_full_text_match(
+        self, string: str, stop_strings: List[str]
+    ) -> Optional[_StoppingCriteriaResult]:
+        """Find earliest full text match of any stop sequence."""
+        earliest_match = {"position": float("inf"), "stop_string": None}
+
+        for stop_string in stop_strings:
+            position = string.find(stop_string)
+
+            if position != -1 and position < earliest_match["position"]:
+                earliest_match.update(
+                    {"position": position, "stop_string": stop_string}
+                )
+
+        if earliest_match["stop_string"] is not None:
+            return self._StoppingCriteriaResult(
+                status="full_stop", stop_string=earliest_match["stop_string"]
+            )
+        return None
+
+
+    def _check_partial_text_match(
+        self, string: str, stop_strings: List[str]
+    ) -> Optional[_StoppingCriteriaResult]:
+        """Check for partial matches with any stop sequence."""
+        for stop_string in stop_strings:
+            if self._sequence_overlap(string, stop_string):
+                return self._StoppingCriteriaResult(
+                    status="partial_match", stop_string=None
+                )
+        return None
+
+    def _sequence_overlap(self, s1: Sequence, s2: Sequence) -> bool:
+        """
+        Checks if a suffix of s1 has overlap with a prefix of s2
+
+        Args:
+            s1 (Sequence): The first sequence
+            s2 (Sequence): The second sequence
+
+        Returns:
+            bool: If the two sequences have overlap
+        """
+        max_overlap = min(len(s1), len(s2))
+        return any(s1[-i:] == s2[:i] for i in range(1, max_overlap + 1))
diff --git a/server/backend/mlx_engine/utils/disable_hf_download.py b/server/backend/mlx_engine/utils/disable_hf_download.py
new file mode 100644
index 0000000..c3f1af9
--- /dev/null
+++ b/server/backend/mlx_engine/utils/disable_hf_download.py
@@ -0,0 +1,27 @@
+from functools import wraps
+import sys
+import huggingface_hub
+
+# Store the original function before we patch anything
+_original_snapshot_download = huggingface_hub.snapshot_download
+
+
+@wraps(_original_snapshot_download)
+def snapshot_download(*args, **kwargs):
+    """
+    Wrapper around huggingface_hub.snapshot_download that disables it
+    """
+    raise RuntimeError(
+        "Internal error: Cannot proceed without downloading from huggingface. Please report this error to the LM Studio team."
+    )
+
+
+def patch_huggingface_hub():
+    """
+    Patch the huggingface_hub module to use our local-only snapshot_download.
+    This ensures that any import of snapshot_download from huggingface_hub
+    will use our wrapped version.
+    """
+    huggingface_hub.snapshot_download = snapshot_download
+    # Also patch the module in sys.modules to ensure any other imports get our version
+    sys.modules["huggingface_hub"].snapshot_download = snapshot_download
diff --git a/server/backend/mlx_engine/utils/eot_tokens.py b/server/backend/mlx_engine/utils/eot_tokens.py
new file mode 100644
index 0000000..6d137f2
--- /dev/null
+++ b/server/backend/mlx_engine/utils/eot_tokens.py
@@ -0,0 +1,73 @@
+from typing import Optional
+from ..model_kit.model_kit import ModelKit
+from ..vision_model_kit.vision_model_kit import VisionModelKit
+
+# Taken from https://github.com/ggml-org/llama.cpp/blob/971f245/src/llama-vocab.cpp#L1807-L1814
+DEFAULT_EOT_TOKENS = [
+    "<|eot_id|>",
+    "<|im_end|>",
+    "<|end|>",
+    "<end_of_turn>",
+    "<|endoftext|>",
+    "<EOT>",
+    "_<EOT>",
+    "<｜end▁of▁sentence｜>",
+]
+
+MODEL_TYPE_TO_EOT_TOKENS = {"gpt_oss": ["<|return|>", "<|call|>"]}
+
+
+def _get_eot_token_ids(tokenizer, model_type: Optional[str] = None) -> set[int]:
+    """
+    Get the token ID of common end-of-text tokens, using the provided tokenizer.
+
+    If the EOT token str cannot be converted into a single token ID, it is discarded as a candidate.
+    """
+    if (
+        isinstance(model_type, str)
+        and len(MODEL_TYPE_TO_EOT_TOKENS.get(model_type, [])) > 0
+    ):
+        eot_tokens = MODEL_TYPE_TO_EOT_TOKENS[model_type]
+    else:
+        eot_tokens = DEFAULT_EOT_TOKENS
+
+    # Convert EOT tokens to token IDs
+    eot_token_ids = [
+        tokenizer.encode(eot_str, add_special_tokens=False) for eot_str in eot_tokens
+    ]
+
+    # Find all elements that are either a single integer or a list with a single integer
+    single_int = [token_id for token_id in eot_token_ids if isinstance(token_id, int)]
+    single_element_list = [
+        token_id[0]
+        for token_id in eot_token_ids
+        if isinstance(token_id, list) and len(token_id) == 1
+    ]
+
+    return set(single_int + single_element_list)
+
+
+def sanitize_eos_tokens(model_kit: ModelKit | VisionModelKit) -> None:
+    # Remove (probably) incorrect EOS tokens
+    tokenizer = model_kit.tokenizer
+    temp_tokens = set()
+    for id in tokenizer.eos_token_ids:
+        text = tokenizer.decode(id)
+        # Specific override for RNJ-1
+        if model_kit.model_type == "gemma3_text" and id == 1 and text == '"':
+            continue
+        temp_tokens.add(id)
+    temp_tokens = temp_tokens.union(_get_eot_token_ids(tokenizer, model_kit.model_type))
+
+    if len(temp_tokens) == 0:
+        raise RuntimeError(
+            f"EOS tokens cannot be empty. Before cleaning, the tokens were {tokenizer.eos_token_ids}"
+        )
+    tokenizer.eos_token_ids = temp_tokens
+
+    if tokenizer.eos_token_id not in tokenizer.eos_token_ids:
+        tokenizer.eos_token_id = min(tokenizer.eos_token_ids)
+        if hasattr(tokenizer, "_tokenizer") and hasattr(
+            tokenizer._tokenizer, "eos_token_id"
+        ):
+            tokenizer._tokenizer.eos_token_id = tokenizer.eos_token_id
diff --git a/server/backend/mlx_engine/utils/fix_mistral_pre_tokenizer.py b/server/backend/mlx_engine/utils/fix_mistral_pre_tokenizer.py
new file mode 100644
index 0000000..0805e3f
--- /dev/null
+++ b/server/backend/mlx_engine/utils/fix_mistral_pre_tokenizer.py
@@ -0,0 +1,70 @@
+from tokenizers import Tokenizer
+from pathlib import Path
+from mlx_lm.tokenizer_utils import TokenizerWrapper
+import logging
+from transformers import LlamaTokenizer
+import traceback
+
+logger = logging.getLogger(__name__)
+
+# List taken from here
+# https://github.com/huggingface/transformers/blob/b9951b4/src/transformers/tokenization_utils_tokenizers.py#L1187
+_LEGACY_MISTRAL_MODEL_TYPES = [
+    "mistral",
+    "mistral3",
+    "voxtral",
+    "ministral",
+    "pixtral",
+]
+
+
+def fix_mistral_pre_tokenizer(
+    *, tokenizer: TokenizerWrapper, model_path: Path, model_type: str
+) -> None:
+    """
+    transformers v5 introduces breaking changes in their tokenization framework.
+    Unfortunately, some of the mistral models were patched incorrectly in transformers during this breakage.
+
+    In transformers-world, using mistral_common for tokenization is a possibility, but we can't use that here
+    since mistral_common (by design) doesn't tokenize the special tokens correctly
+
+    For mistral models that were introduced in transformers v4, check to see if tokenization is broken. The breakage
+    specifically happens for LlamaTokenizer instances
+
+    Tokenization is considered broken if it doesn't handle whitespace correctly. For example, tokenizing
+    `Hello world` should result in tokens `["Hello", " world"]`, and not `["Hello", "world"]`. Note the missing
+    whitespace before `world`
+    """
+    if model_type not in _LEGACY_MISTRAL_MODEL_TYPES:
+        return
+    logger.info("Detected mistral model. Checking if tokenizer needs fixing...")
+    if not isinstance(tokenizer._tokenizer, LlamaTokenizer):
+        logger.info(f"Tokenizer is of type {type(tokenizer._tokenizer)}. Skipping fix.")
+        return
+    if not _tokenizer_is_broken(tokenizer):
+        logger.info("Tokenizer working as expected.")
+        return
+
+    # Fix pre-tokenizer
+    try:
+        tok = Tokenizer.from_file(str(model_path / "tokenizer.json"))
+        tokenizer._tokenizer._tokenizer.pre_tokenizer = tok.pre_tokenizer
+    except Exception:
+        logger.warning(f"Failed to fix tokenizer: {traceback.format_exc()}.")
+        return
+
+    if _tokenizer_is_broken(tokenizer):
+        logger.warning("Tokenizer could not be fixed.")
+        return
+
+    logger.info("Successfully fixed tokenizer.")
+
+
+def _tokenizer_is_broken(tokenizer: TokenizerWrapper) -> bool:
+    """
+    `["about", "Paris"]` shows us that the tokenization is broken because
+    the whitespace is missing between `about` and `Paris`.
+    """
+    test_prompt = "Tell me about Paris"
+    tokens = tokenizer.tokenize(test_prompt)
+    return tokens[-2:] == ["about", "Paris"]
diff --git a/server/backend/mlx_engine/utils/image_utils.py b/server/backend/mlx_engine/utils/image_utils.py
new file mode 100644
index 0000000..da47477
--- /dev/null
+++ b/server/backend/mlx_engine/utils/image_utils.py
@@ -0,0 +1,97 @@
+import base64
+from io import BytesIO
+from typing import List
+import PIL
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def convert_to_pil(images_b64: List[str]) -> list[PIL.Image.Image]:
+    """Convert a list of base64 strings to PIL Images"""
+    return [
+        PIL.Image.open(BytesIO(base64.b64decode(img))).convert("RGB")
+        for img in images_b64 or []
+    ]
+
+
+def custom_resize(
+    pil_images: list[PIL.Image.Image],
+    *,
+    max_size: tuple[int, int] | None,
+    should_pad: bool = True,
+):
+    """
+    Resize and optionally pad a list of PIL images.
+
+    This function resizes images that exceed the specified maximum dimensions,
+    maintaining their aspect ratios. If there is more than one image, it also
+    pads all images to the same size.
+
+    Args:
+        pil_images (list): A list of PIL Image objects to be processed.
+        max_size (tuple): Maximum allowed dimensions (width, height) for the images.
+                        If None, no resizing is performed.
+        should_pad (bool): Whether to pad the images to the same size.
+                        Defaults to True.
+
+    Returns:
+        list: A list of processed PIL Image objects. If there was only one input image,
+            it returns the resized image without padding. If there were multiple input
+            images, it returns padded images of uniform size.
+
+    Side effects:
+        Writes progress and status messages to sys.stderr.
+    """
+    # Validate max_size parameter
+    if max_size is not None:
+        if not isinstance(max_size, tuple) or len(max_size) != 2:
+            raise ValueError(
+                "max_size must be a tuple of (width, height), e.g., (1024, 1024)"
+            )
+        if not all(isinstance(dim, int) and dim > 0 for dim in max_size):
+            raise ValueError("max_size dimensions must be positive integers")
+
+    resized_images = []
+    max_width, max_height = 0, 0
+
+    for i, img in enumerate(pil_images):
+        original_width, original_height = img.width, img.height
+        if original_height == 0:
+            logger.warning(f"Image {i+1} has zero height. Skipping resizing.")
+            resized_images.append(img)
+            continue
+        aspect_ratio = img.width / img.height
+
+        if max_size is not None and (
+            img.width > max_size[0] or img.height > max_size[1]
+        ):
+            scale = min(max_size[0] / original_width, max_size[1] / original_height, 1.0)
+            new_width = int(original_width * scale)
+            new_height = int(original_height * scale)
+            img = img.resize((new_width, new_height), PIL.Image.LANCZOS)
+            logger.info(
+                f"Image {i + 1}: Resized from {original_width}x{original_height} to {new_width}x{new_height}\n",
+            )
+
+        max_width = max(max_width, img.width)
+        max_height = max(max_height, img.height)
+
+        resized_images.append(img)
+
+    if len(pil_images) > 1 and should_pad:
+        logger.info(
+            f"[mlx-engine] Maximum dimensions: {max_width}x{max_height}. "
+            f"Adding padding so that all images are the same size.\n",
+        )
+
+        final_images = []
+        for i, img in enumerate(resized_images):
+            new_img = PIL.Image.new("RGB", (max_width, max_height), (0, 0, 0))
+            paste_x = (max_width - img.width) // 2
+            paste_y = (max_height - img.height) // 2
+            new_img.paste(img, (paste_x, paste_y))
+            final_images.append(new_img)
+        return final_images
+    else:
+        return resized_images
diff --git a/server/backend/mlx_engine/utils/kv_cache_quantization.py b/server/backend/mlx_engine/utils/kv_cache_quantization.py
new file mode 100644
index 0000000..5ac65ba
--- /dev/null
+++ b/server/backend/mlx_engine/utils/kv_cache_quantization.py
@@ -0,0 +1,48 @@
+from typing import Optional, Tuple
+
+# https://github.com/ml-explore/mlx/blob/f288db8d34c0bcfa0867b6458ab0277c5e86ed45/mlx/fast.cpp#L782
+VALID_KV_BITS = (2, 3, 4, 6, 8)
+# https://github.com/ml-explore/mlx/blob/f288db8d34c0bcfa0867b6458ab0277c5e86ed45/mlx/fast.cpp#L775
+VALID_KV_GROUP_SIZE = (32, 64, 128)
+
+
+def get_kv_cache_quantization_params(
+    kv_bits: Optional[int],
+    kv_group_size: Optional[int],
+    quantized_kv_start: Optional[int],
+) -> Tuple[Optional[int], Optional[int], Optional[int]]:
+    """
+    Validates and processes KV cache quantization parameters.
+
+    Args:
+        kv_bits: Number of bits for quantization. If None, disables quantization.
+        kv_group_size: Group size for quantization. Defaults to 64 if quantization enabled.
+        quantized_kv_start: Step to begin quantization. Defaults to 0 if quantization enabled.
+
+    Returns:
+        Tuple of (kv_bits, kv_group_size, quantized_kv_start), all None if quantization disabled.
+
+    Raises:
+        ValueError: If kv_bits is invalid or missing when other params are set.
+    """
+    if any([kv_group_size, quantized_kv_start]) and kv_bits is None:
+        raise ValueError("Enabling KV Cache Quantization requires kv_bits to be set")
+
+    if kv_bits is None:
+        return None, None, None
+
+    # defaults taken from here:
+    # https://github.com/ml-explore/mlx-examples/blob/3d793ec/llms/mlx_lm/utils.py#L352-L353
+    if kv_group_size is None:
+        kv_group_size = 64
+    if quantized_kv_start is None:
+        quantized_kv_start = 0
+
+    if kv_bits not in VALID_KV_BITS:
+        raise ValueError(f"Invalid kv_bits value. Must be one of {VALID_KV_BITS}")
+    if kv_group_size not in VALID_KV_GROUP_SIZE:
+        raise ValueError(
+            f"Invalid kv_group_size value. Must be one of {VALID_KV_GROUP_SIZE}"
+        )
+
+    return kv_bits, kv_group_size, quantized_kv_start
diff --git a/server/backend/mlx_engine/utils/logger.py b/server/backend/mlx_engine/utils/logger.py
new file mode 100644
index 0000000..78220d3
--- /dev/null
+++ b/server/backend/mlx_engine/utils/logger.py
@@ -0,0 +1,42 @@
+"""
+Basic logging setup for mlx_engine.
+
+This module configures standard library logging to output to stderr.
+Individual modules should get their own loggers using logging.getLogger(__name__).
+"""
+
+import logging
+import sys
+
+
+class MLXEngineStreamHandler(logging.StreamHandler):
+    """Custom StreamHandler that suppresses errors locally instead of globally."""
+
+    def handleError(self, record):
+        """Swallow handler-specific exceptions."""
+        pass
+
+
+def setup_logging():
+    """Setup basic logging configuration for mlx_engine."""
+    # Configure root logger for mlx_engine
+    logger = logging.getLogger("mlx_engine")
+    logger.setLevel(logging.INFO)
+
+    # Remove any existing handlers
+    logger.handlers.clear()
+
+    # Create handler that writes to stderr
+    handler = MLXEngineStreamHandler(sys.stderr)
+    handler.setLevel(logging.INFO)
+
+    # Simple formatter with logger name and level
+    formatter = logging.Formatter("[%(module)s][%(levelname)s]: %(message)s")
+    handler.setFormatter(formatter)
+
+    logger.addHandler(handler)
+
+    # Prevent propagation to root logger
+    logger.propagate = False
+
+    return logger
diff --git a/server/backend/mlx_engine/utils/outlines_transformer_tokenizer.py b/server/backend/mlx_engine/utils/outlines_transformer_tokenizer.py
new file mode 100644
index 0000000..396931a
--- /dev/null
+++ b/server/backend/mlx_engine/utils/outlines_transformer_tokenizer.py
@@ -0,0 +1,14 @@
+from outlines.models.transformers import TransformerTokenizer
+from ..external.datasets.dill import Hasher
+
+
+class OutlinesTransformerTokenizer(TransformerTokenizer):
+    """
+    Update the outlines TransformerTokenizer to use our own Hasher class, so that we don't need the datasets dependency
+
+    This class and the external dependency can be removed when the following import is deleted
+    https://github.com/dottxt-ai/outlines/blob/69418d/outlines/models/transformers.py#L117
+    """
+
+    def __hash__(self):
+        return hash(Hasher.hash(self.tokenizer))
diff --git a/server/backend/mlx_engine/utils/progress_decorators.py b/server/backend/mlx_engine/utils/progress_decorators.py
new file mode 100644
index 0000000..f06bc0e
--- /dev/null
+++ b/server/backend/mlx_engine/utils/progress_decorators.py
@@ -0,0 +1,100 @@
+from typing import Optional, Callable
+from ..cache_wrapper import StopPromptProcessing
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def ratchet(
+    callback: Optional[Callable[[float], bool]],
+) -> Optional[Callable[[float], bool]]:
+    """
+    Wraps a progress callback to ensure progress values are monotonically increasing.
+
+    This wrapper prevents progress from appearing to move backwards by using a ratchet
+    mechanism. If a lower percentage is reported than previously seen, the callback
+    returns True (continue) without calling the original callback.
+
+    Args:
+        callback: A callback that accepts progress (0.0–100.0) and returns
+                 True to continue or False to stop. May be None.
+
+    Returns:
+        A wrapped callback that ensures monotonic progress reporting.
+        If callback is None, returns None.
+    """
+    if callback is None:
+        return None
+
+    ratchet = float("-inf")
+
+    def inner_callback(percentage: float) -> bool:
+        nonlocal ratchet
+        if percentage <= ratchet:
+            return True
+        ratchet = percentage
+        return callback(percentage)
+
+    return inner_callback
+
+
+def throw_to_stop(
+    callback: Optional[Callable[[float], bool]],
+) -> Optional[Callable[[float], bool]]:
+    """
+    Wraps a progress callback to raise an exception when stopping is requested.
+
+    Instead of returning False to indicate stopping, this wrapper raises a
+    StopPromptProcessing exception when the original callback returns False.
+    This allows for immediate termination of the generation process.
+
+    Args:
+        callback: A callback that accepts progress (0.0–100.0) and returns
+                 True to continue or False to stop. May be None.
+
+    Returns:
+        A wrapped callback that raises StopPromptProcessing when stopping
+        is requested. If callback is None, returns None.
+
+    Raises:
+        StopPromptProcessing: When the original callback returns False.
+    """
+    if callback is None:
+        return None
+
+    def inner_callback(percentage: float) -> bool:
+        should_continue = callback(percentage)
+        if not should_continue:
+            logger.info("Prompt processing was cancelled by the user.")
+            raise StopPromptProcessing
+        return should_continue
+
+    return inner_callback
+
+
+def token_count(
+    callback: Optional[Callable[[float], bool]],
+) -> Optional[Callable[[int, int], bool | None]]:
+    """
+    Adapts a float percentage based progress callback into a token count based one.
+
+    Args:
+        callback: A callback that accepts progress (0.0–100.0) and returns
+                  True to continue or False to stop. May be None.
+
+    Returns:
+        A token-based callback (processed_tokens, total_tokens) -> bool | None,
+        as is expected by mlx-lm's stream_generate.
+        If callback is None, returns None.
+    """
+    if callback is None:
+        return None
+
+    def inner_callback(processed_tokens: int, total_tokens: int) -> bool | None:
+        if total_tokens <= 0:
+            progress = 0.0
+        else:
+            progress = 100 * processed_tokens / total_tokens
+        return callback(progress)
+
+    return inner_callback
diff --git a/server/backend/mlx_engine/utils/prompt_processing.py b/server/backend/mlx_engine/utils/prompt_processing.py
new file mode 100644
index 0000000..22795e2
--- /dev/null
+++ b/server/backend/mlx_engine/utils/prompt_processing.py
@@ -0,0 +1,41 @@
+from typing import Optional, Callable
+
+from mlx import nn
+import mlx.core as mx
+
+from ..cache_wrapper import CacheWrapper
+
+
+def process_prompt_text_only(
+    prompt_tokens: mx.array,
+    cache_wrapper: CacheWrapper,
+    generate_args: dict,
+    draft_model: Optional[nn.Module],
+    speculative_decoding_toggle: Optional[bool],
+    prompt_progress_callback: Optional[Callable[[float], bool]],
+):
+    if cache_wrapper is None:
+        raise ValueError("Cache wrapper is not initialized, cannot process prompt")
+    # Make sure cache's draft model setting aligns with speculative decoding toggle
+    should_use_draft_model = (
+        speculative_decoding_toggle
+        if speculative_decoding_toggle is not None
+        else draft_model is not None
+    )
+    if should_use_draft_model:
+        if not draft_model:
+            raise ValueError(
+                "Speculative decoding toggle is enabled for prompt processing but no "
+                "draft model is loaded"
+            )
+        cache_wrapper.set_draft_model(draft_model)
+    else:
+        cache_wrapper.unset_draft_model()
+
+    # Check for common tokens with the previous cache and re-use the cache if possible
+    prompt_tokens = cache_wrapper.update_cache(
+        prompt_tokens,
+        prompt_progress_callback,
+    )
+    generate_args["prompt_cache"] = cache_wrapper.cache
+    return prompt_tokens
diff --git a/server/backend/mlx_engine/utils/register_models.py b/server/backend/mlx_engine/utils/register_models.py
new file mode 100644
index 0000000..421aed9
--- /dev/null
+++ b/server/backend/mlx_engine/utils/register_models.py
@@ -0,0 +1,8 @@
+"""Register local model-specific code to bypass enabling `trust_remote_code`."""
+
+def register_models():
+    """Register local model-specific code. 
+    
+    Currently empty as Ernie and LFM2-VL models have been pruned.
+    """
+    pass
diff --git a/server/backend/mlx_engine/utils/set_seed.py b/server/backend/mlx_engine/utils/set_seed.py
new file mode 100644
index 0000000..2e0c6f5
--- /dev/null
+++ b/server/backend/mlx_engine/utils/set_seed.py
@@ -0,0 +1,50 @@
+import numpy as np
+import torch
+import mlx.core as mx
+import time
+from typing import Optional
+import random
+
+
+def set_seed(seed: Optional[int]) -> None:
+    """
+    Set the seed for all random number generators used in mlx-engine to ensure reproducible results.
+    This function synchronizes the random states across multiple libraries including MLX, NumPy,
+    PyTorch, and Python's built-in random module.
+
+    Args:
+        seed: The seed value to initialize random number generators. If None, a seed will be
+            automatically generated using the current nanosecond timestamp. The final seed
+            value will be truncated to 32 bits for compatibility across all random number
+            generators.
+
+    Raises:
+        ValueError: If the provided seed is negative.
+
+    Returns:
+        None
+
+    Note:
+        This function affects the following random number generators:
+        - MLX (mx.random)
+        - NumPy (np.random)
+        - PyTorch (torch.manual_seed)
+        - Python's built-in random module
+    """
+    if seed is None:
+        # Get nanosecond timestamp and use it as seed
+        seed = int(time.time_ns())
+
+    if seed < 0:
+        raise ValueError("Seed must be a non-negative integer.")
+    seed = seed & (2**32 - 1)  # Ensure seed fits in 32 bits
+
+    # For MLX and MLX_LM
+    mx.random.seed(seed)
+
+    # MLX_VLM depends on numpy and torch
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    # Just in case
+    random.seed(seed)
diff --git a/server/backend/mlx_engine/utils/speculative_decoding.py b/server/backend/mlx_engine/utils/speculative_decoding.py
new file mode 100644
index 0000000..fe94dc1
--- /dev/null
+++ b/server/backend/mlx_engine/utils/speculative_decoding.py
@@ -0,0 +1,50 @@
+from typing import Optional
+import mlx.nn as nn
+import logging
+
+from ..model_kit.model_kit import ModelKit
+
+logger = logging.getLogger(__name__)
+
+
+def determine_draft_model_for_generation(
+    model_kit: ModelKit, speculative_decoding_toggle: Optional[bool]
+) -> Optional[nn.Module]:
+    """
+    Based on ModelKit and speculative_decoding_toggle, determine draft model to use for
+    generation, or None
+    """
+    if speculative_decoding_toggle is None:
+        # toggle not set, use draft model if available
+        return model_kit.draft_model
+    elif speculative_decoding_toggle and model_kit.draft_model is None:
+        raise ValueError(
+            "Speculative decoding toggle is explicitly enabled but no draft model is loaded"
+        )
+    elif not speculative_decoding_toggle and model_kit.draft_model is not None:
+        logger.info(
+            "Draft model is loaded but speculative decoding is disabled for this generation"
+        )
+        return None
+    else:
+        # toggle set to true, draft model available
+        return model_kit.draft_model
+
+
+def configure_num_draft_tokens_in_generate_args(
+    model_kit: ModelKit,
+    draft_model: Optional[nn.Module],
+    num_draft_tokens: Optional[int],
+    generate_args: dict,
+):
+    """
+    Modifies generate_args in place to include num_draft_tokens if applicable
+    """
+    # Only configure draft tokens when all required conditions are met
+    should_add_num_draft_tokens_to_args = (
+        type(model_kit) is ModelKit
+        and draft_model is not None
+        and num_draft_tokens is not None
+    )
+    if should_add_num_draft_tokens_to_args:
+        generate_args["num_draft_tokens"] = num_draft_tokens
diff --git a/server/backend/mlx_engine/utils/token.py b/server/backend/mlx_engine/utils/token.py
new file mode 100644
index 0000000..570449c
--- /dev/null
+++ b/server/backend/mlx_engine/utils/token.py
@@ -0,0 +1,14 @@
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class Token:
+    """
+    Base dataclass for a single generated token.
+    """
+
+    id: int
+    text: str
+    logprob: float
+    from_draft: Optional[bool] = None
diff --git a/server/backend/mlx_engine/utils/top_logprobs.py b/server/backend/mlx_engine/utils/top_logprobs.py
new file mode 100644
index 0000000..0c3baba
--- /dev/null
+++ b/server/backend/mlx_engine/utils/top_logprobs.py
@@ -0,0 +1,28 @@
+from .token import Token
+
+import mlx.core as mx
+
+
+def summarize_top_logprobs(
+    tokenizer, logprobs: mx.array, top_logprobs: int
+) -> list[Token]:
+    # find the sorted indices (in descending order) of the logprobs
+    sorted_indices = mx.argsort(-logprobs)
+
+    # sort the logprobs in descending order
+    sorted_logprobs = logprobs[..., sorted_indices]
+
+    # slice the top logprobs
+    top_indices = sorted_indices[:top_logprobs]
+    top_logprob_values = sorted_logprobs[:top_logprobs]
+
+    # decode the top indices
+    text_list = [tokenizer.decode(index) for index in top_indices.tolist()]
+
+    # return list of TokenLogprob with id (int), text (str), and logprob (float)
+    return [
+        Token(id=int(idx), text=txt, logprob=float(prob))
+        for idx, txt, prob in zip(
+            top_indices.tolist(), text_list, top_logprob_values.tolist()
+        )
+    ]
diff --git a/server/backend/mlx_engine/vision_model_kit/_transformers_compatibility.py b/server/backend/mlx_engine/vision_model_kit/_transformers_compatibility.py
new file mode 100644
index 0000000..0ddb1a2
--- /dev/null
+++ b/server/backend/mlx_engine/vision_model_kit/_transformers_compatibility.py
@@ -0,0 +1,60 @@
+import json
+from pathlib import Path
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def fix_qwen2_5_vl_image_processor(model_path: Path):
+    """
+    Update the `image_processor_type` in the preprocessor_config.json file to Qwen2VLImageProcessor
+    Ref https://huggingface.co/mlx-community/Qwen2.5-VL-7B-Instruct-4bit/commit/fdcc572e8b05ba9daeaf71be8c9e4267c826ff9b
+    """
+    try:
+        # We are looking for a specific entry, so if any of this throws, we don't need to do anything
+        with open(model_path / "preprocessor_config.json", "r") as f:
+            image_processor_type = json.load(f)["image_processor_type"]
+        with open(model_path / "config.json", "r") as f:
+            model_type = json.load(f)["model_type"]
+    except:  # noqa: E722
+        return
+
+    if not (
+        image_processor_type == "Qwen2_5_VLImageProcessor"
+        and model_type == "qwen2_5_vl"
+    ):
+        return
+
+    # Replace image_processor_type with Qwen2VLImageProcessor
+    logger.warning(
+        "Replacing `image_processor_type` with Qwen2VLImageProcessor in preprocessor_config.json"
+    )
+    with open(model_path / "preprocessor_config.json", "r") as f:
+        preprocessor_config = json.load(f)
+    preprocessor_config["image_processor_type"] = "Qwen2VLImageProcessor"
+    with open(model_path / "preprocessor_config.json", "w") as f:
+        json.dump(preprocessor_config, f)
+
+
+def fix_qwen2_vl_preprocessor(model_path: Path):
+    """
+    Remove the `size` entry from the preprocessor_config.json file, which is broken as of transformers v5.0.0rc2 (release candidate)
+    Ref the transformers implementation: https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct/blob/e28f5d3/preprocessor_config.json
+    """
+    try:
+        # We are looking for a specific entry, so if any of this throws, we don't need to do anything
+        with open(model_path / "config.json", "r") as f:
+            model_type = json.load(f)["model_type"]
+        if model_type != "qwen2_vl":
+            return
+        with open(model_path / "preprocessor_config.json", "r") as f:
+            json.load(f)["size"]
+    except:  # noqa: E722
+        return
+
+    logger.warning("Removing `size` entry from preprocessor_config.json")
+    with open(model_path / "preprocessor_config.json", "r") as f:
+        preprocessor_config = json.load(f)
+    preprocessor_config.pop("size")
+    with open(model_path / "preprocessor_config.json", "w") as f:
+        json.dump(preprocessor_config, f)
diff --git a/server/backend/mlx_engine/vision_model_kit/vision_model_kit.py b/server/backend/mlx_engine/vision_model_kit/vision_model_kit.py
new file mode 100644
index 0000000..0e4bf5a
--- /dev/null
+++ b/server/backend/mlx_engine/vision_model_kit/vision_model_kit.py
@@ -0,0 +1,160 @@
+from typing import Union, Optional, List, Tuple
+from ..model_kit.model_kit import ModelKit
+import logging
+
+from ._transformers_compatibility import (
+    fix_qwen2_5_vl_image_processor,
+    fix_qwen2_vl_preprocessor,
+)
+from .vision_model_wrapper import VisionModelWrapper
+import mlx_vlm
+import mlx_lm
+from pathlib import Path
+import mlx.core as mx
+from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
+
+logger = logging.getLogger(__name__)
+
+
+class VisionModelKit(ModelKit):
+    """
+    Collection of objects and methods that are needed for operating a vision model
+    """
+
+    config: dict = None
+    trust_remote_code: bool = False
+    model_path: Path = None
+    vocab_only: bool = False
+    model_weights = None
+
+    processor: Union[PreTrainedTokenizer, PreTrainedTokenizerFast] = None
+    has_processed_prompt: bool = False
+
+    def __init__(
+        self,
+        model_path: Path,
+        vocab_only: bool,
+        trust_remote_code: bool,
+    ):
+        fix_qwen2_5_vl_image_processor(model_path)
+        fix_qwen2_vl_preprocessor(model_path)
+        self.config = mlx_vlm.utils.load_config(
+            model_path, trust_remote_code=trust_remote_code
+        )
+        self.trust_remote_code = trust_remote_code
+        self.vocab_only = vocab_only
+        self.model_path = model_path
+        self._initializer()
+
+    def _vocab_only_init(self):
+        self.tokenizer = mlx_vlm.tokenizer_utils.load_tokenizer(self.model_path)
+        self.detokenizer = self.tokenizer.detokenizer
+
+    def _full_model_init(self):
+        additional_kwargs = {}
+        if self.model_weights:
+            additional_kwargs["weights"] = self.model_weights
+        return_tuple = mlx_vlm.utils.load(
+            self.model_path,
+            processor_config={"trust_remote_code": self.trust_remote_code},
+            trust_remote_code=self.trust_remote_code,
+            **additional_kwargs,
+        )
+        if len(return_tuple) == 2:
+            self.model, self.processor = return_tuple
+        else:
+            self.model, self.processor, self.model_weights = return_tuple
+        self.model = VisionModelWrapper(self.model)
+
+        # Set the eos_token_ids (check root level first, then text_config)
+        eos_token_ids_raw = self.config.get("eos_token_id")
+        if eos_token_ids_raw is None:
+            eos_token_ids_raw = self.config.get("text_config", {}).get("eos_token_id")
+        eos_token_ids = None
+        if eos_token_ids_raw is not None:
+            eos_token_ids = (
+                [eos_token_ids_raw]
+                if isinstance(eos_token_ids_raw, int)
+                else list(dict.fromkeys(eos_token_ids_raw))
+            )
+            logger.info(f"Setting eos token ids: {eos_token_ids}")
+
+        # Use the mlx_lm tokenizer since it's more robust
+        self.tokenizer = mlx_lm.tokenizer_utils.load(
+            self.model_path, eos_token_ids=eos_token_ids
+        )
+        self.detokenizer = self.tokenizer.detokenizer
+
+        self.cache_wrapper = None
+        mx.clear_cache()
+
+    def _initializer(self):
+        if self.vocab_only:
+            self._vocab_only_init()
+        else:
+            self._full_model_init()
+
+    def _reset_for_prediction(self):
+        # It's a shortcoming that the only way to reset the model for prediction
+        # is to reload it. Worth investigating how to make resetting faster
+        self._full_model_init()
+
+    def process_prompt(
+        self,
+        prompt_tokens,
+        images_b64: Optional[List[str]],
+        prompt_progress_callback,
+        generate_args,
+        max_image_size: tuple[int, int] | None,
+        speculative_decoding_toggle: Optional[bool] = None,
+    ) -> Tuple[mx.array, Optional[mx.array]]:
+        """
+        Call this before starting evaluation
+
+        This method opens the image from the base64-encoded string, and adds the special image token to the prompt
+
+        Returns the processed prompt tokens to be input to the `generate_step` function, and optionally input
+        embeddings. For VisionModelKit, the input embeddings are always none.
+        """
+        if self.has_processed_prompt:
+            self._reset_for_prediction()
+
+        self.model.process_prompt_with_images(
+            images_b64, prompt_tokens, self.processor, self.detokenizer, max_image_size
+        )
+        self.has_processed_prompt = True
+
+        # The VLM input_ids shape is important, but mlx_lm expects a flattened array
+        #   Send back a fake shape and input_ids, and save the real shape in `self.model.input_ids`
+        if images_b64 is None or len(images_b64) == 0:
+            # For text-only, enable mlx-lm prompt processing
+            return self.model.input_ids.reshape(-1), None
+        # Disable mlx-lm prompt processing by returning a fake input
+        return mx.array([0]), mx.array([0])
+
+    def is_cross_prompt_cache_active(self) -> bool:
+        """VisionModelKit does not support cross prompt caching"""
+        return False
+
+    def record_token_to_cache(self, token: int) -> None:
+        pass
+
+    def record_sampled_token(self, token: int) -> None:
+        self.model.record_sampled_token(token)
+
+    def is_draft_model_compatible(self, path: str | Path) -> bool:
+        return False
+
+    def load_draft_model(self, path: str | Path) -> None:
+        raise ValueError(
+            "Speculative decoding is not currently supported for vision models"
+        )
+
+    def unload_draft_model(self) -> None:
+        raise ValueError(
+            "Speculative decoding is not currently supported for vision models"
+        )
+
+    @property
+    def language_model(self):
+        return self.model.language_model
diff --git a/server/backend/mlx_engine/vision_model_kit/vision_model_wrapper.py b/server/backend/mlx_engine/vision_model_kit/vision_model_wrapper.py
new file mode 100644
index 0000000..3583329
--- /dev/null
+++ b/server/backend/mlx_engine/vision_model_kit/vision_model_wrapper.py
@@ -0,0 +1,214 @@
+import mlx.core as mx
+import logging
+
+from mlx_vlm.models.cache import KVCache, SimpleKVCache
+from typing import List, Optional
+from ..model_kit.vision_add_ons.process_prompt_with_images import (
+    common_process_prompt_with_images,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class VisionModelWrapper:
+    """
+    Wrapper class for Vision Models support
+    This wrapper class adapts mlx-vlm models so that they can be slotted into the mlx_lm generation engine
+    This wrapper defines `__getattr__` and `__setattr__` to allow the model properties to be set/get as if it were a text model
+
+    Models are evaluated in `mlx_lm` with the `__call__` method, so define a custom `__call__` method to forward calls to the vision model
+    """
+
+    def __init__(self, model):
+        """
+        Set the class members in this unusual way, so that we can define `__getattr__` and `__setattr__`
+        """
+        self.__dict__["_model_attrs"] = {
+            "vision_model": model,
+            "input_ids": None,
+            "pixel_values": None,
+            "mask": None,
+            "first_call": False,
+            "decoder_input_ids": None,
+            "language_model_kwargs": {},
+            # vision model kwargs
+            "model_inputs": {},
+        }
+
+    def __getattr__(self, name):
+        """
+        First, check if the name is a member of this class
+        Then, check if the name is a member of the language model
+        Finally, check if the name is a member of the vision model
+        """
+        if name in self._model_attrs:
+            return self._model_attrs[name]
+        try:
+            return getattr(self.vision_model.language_model, name)
+        except AttributeError:
+            pass
+        return getattr(self.vision_model, name)
+
+    def __setattr__(self, name, value):
+        """
+        Set attribute of this class if it's not a member of the vision model
+        """
+        if name in self._model_attrs or not hasattr(self.vision_model, name):
+            self._model_attrs[name] = value
+        else:
+            setattr(self.vision_model, name, value)
+
+    def __call__(self, *args, input_embeddings=None, **kwargs):
+        """
+        See this reference implementation
+        https://github.com/Blaizzy/mlx-vlm/blob/6c98971/mlx_vlm/utils.py#L783-L810
+
+        In the reference implementation, the vision model is called once at the beginning,
+        then all subsequent calls are forwarded to the language model. Mirror that behavior here.
+        """
+        if self.pixel_values is not None and not self.first_call:
+            self.first_call = True
+
+            # taken from here https://github.com/Blaizzy/mlx-vlm/blob/2974401/mlx_vlm/utils.py#L987
+            if hasattr(self.language_model, "make_cache"):
+                cache = self.language_model.make_cache()
+            else:
+                kv_heads = (
+                    [self.language_model.n_kv_heads] * len(self.language_model.layers)
+                    if isinstance(self.language_model.n_kv_heads, int)
+                    else self.language_model.n_kv_heads
+                )
+                if self.vision_model.config.model_type == "florence2":
+                    cache = [
+                        (SimpleKVCache(), SimpleKVCache())
+                        for n in self.language_model.layers
+                    ]
+                else:
+                    cache = [KVCache() for n in kv_heads]
+
+            # Replace the mlx_lm cache with the one we created
+            kwargs["cache"] = cache
+
+            outputs = self.vision_model(
+                self.input_ids,
+                self.pixel_values,
+                mask=self.mask,
+                **self.model_inputs,
+                **kwargs,
+            )
+
+            # taken from here https://github.com/Blaizzy/mlx-vlm/blob/2974401/mlx_vlm/utils.py#L1045-L1056
+            if outputs.cross_attention_states is not None:
+                self.language_model_kwargs = {
+                    k: v
+                    for k, v in zip(
+                        ["cross_attention_states"], [outputs.cross_attention_states]
+                    )
+                }
+            elif outputs.encoder_outputs is not None:
+                self.decoder_input_ids = self.input_ids
+                self.language_model_kwargs = {
+                    "decoder_input_ids": self.decoder_input_ids,
+                    "encoder_outputs": outputs.encoder_outputs,
+                }
+
+            # Add the cache we created here to the language model kwargs
+            self.language_model_kwargs["cache"] = cache
+        else:
+            try:
+                if (
+                    "cache" in self.language_model_kwargs
+                ):  # This is only missing if self.pixel_values is None
+                    del kwargs["cache"]  # Use the cache from self.language_model_kwargs
+
+                # taken from here https://github.com/Blaizzy/mlx-vlm/blob/2974401/mlx_vlm/utils.py#L1009
+                if "decoder_input_ids" in self.language_model_kwargs:
+                    self.language_model_kwargs["decoder_input_ids"] = (
+                        self.decoder_input_ids
+                    )
+                    outputs = self.language_model(
+                        **kwargs,
+                        **self.language_model_kwargs,
+                    )
+                else:
+                    outputs = self.language_model(
+                        *args,
+                        **kwargs,
+                        **self.language_model_kwargs,
+                    )
+
+            except ValueError as e:
+                # Create a friendly error message if a user tries to use mllama without images
+                if "Cross attention states must be provided for layer" in str(e):
+                    raise ValueError(
+                        "Using this model without any images attached is not supported yet."
+                    )
+                raise e
+
+        return outputs.logits
+
+    def record_sampled_token(self, token: int) -> None:
+        # Adapted from here https://github.com/Blaizzy/mlx-vlm/blob/2974401/mlx_vlm/utils.py#L1064
+        self.decoder_input_ids = mx.array([token])
+
+    def process_prompt_with_images(
+        self,
+        images_b64: Optional[List[str]],
+        prompt_tokens: mx.array,
+        processor,
+        detokenizer,
+        max_image_size: tuple[int, int] | None,
+    ):
+        """
+        This method generates the input_ids, pixel_values, and mask for the vision model
+        Call this before starting evaluation
+        """
+        if images_b64 is None:
+            images_b64 = []
+
+        # Handle the case with no images
+        if len(images_b64) == 0:
+            detokenizer.reset()
+            [detokenizer.add_token(token) for token in prompt_tokens]
+            detokenizer.finalize()
+            prompt = detokenizer.text
+
+            logger.debug(f"Prompt dump: {prompt}\n")
+
+            try:
+                if hasattr(processor, "process"):
+                    # Needed for Molmo
+                    self.input_ids = mx.array(
+                        processor.process(text=prompt)["input_ids"]
+                    )
+                else:
+                    self.input_ids = mx.array(processor(text=prompt).input_ids)
+            except ValueError as e:
+                if "`images` are expected as arguments" in str(e):
+                    raise ValueError(
+                        "Using this model without any images attached is not supported yet."
+                    )
+                raise e
+        else:
+            # Use the common function for image processing
+            processed = common_process_prompt_with_images(
+                prompt_tokens=prompt_tokens,
+                images_b64=images_b64,
+                processor=processor,
+                config=self.vision_model.config,
+                max_size=max_image_size,
+            )
+
+            # Set class attributes from the processed result
+            self.input_ids = processed.input_ids
+            self.pixel_values = processed.pixel_values
+            self.mask = processed.attention_mask
+            self.model_inputs = processed.other_inputs
+
+    @property
+    def vision_model(self):
+        return self._model_attrs["vision_model"]
+
+    @property
+    def language_model(self):
+        return self.vision_model.language_model
diff --git a/server/backend/mlx_runner.py b/server/backend/mlx_runner.py
index 967ab5c..b061408 100644
--- a/server/backend/mlx_runner.py
+++ b/server/backend/mlx_runner.py
@@ -1,43 +1,39 @@
 """
-Enhanced MLX model runner with direct API integration.
+Enhanced MLX model runner using the vendored mlx_engine.
 Provides ollama-like run experience with streaming and interactive chat.
 """
 
-import sys
+import gc
 import json
+import logging
 import os
+import psutil
 import time
 from collections.abc import Iterator
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Any, Dict, List, Optional
 
-if sys.platform == "darwin":
-    import mlx.core as mx
-else:
-    mx = None
-from mlx_lm import load
-from mlx_lm.generate import generate_step
-from mlx_lm.sample_utils import make_repetition_penalty, make_sampler
+import mlx.core as mx
 
-from ..reasoning_utils import ReasoningExtractor, StreamingReasoningParser
+from .mlx_engine import create_generator as engine_create_generator
+from .mlx_engine import load_model as engine_load_model
+from .mlx_engine import tokenize as engine_tokenize
+from .mlx_engine.cache_wrapper import StopPromptProcessing
 
+from ..reasoning_utils import ReasoningExtractor, StreamingReasoningParser
 
-def get_model_context_length(model_path: str) -> int:
-    """Extract max_position_embeddings from model config.
+# Initialize logger
+logger = logging.getLogger(__name__)
 
-    Args:
-        model_path: Path to the MLX model directory
 
-    Returns:
-        Maximum context length for the model (defaults to 4096 if not found)
-    """
+def get_model_context_length(model_path: str) -> int:
+    """Extract max_position_embeddings from model config."""
     config_path = os.path.join(model_path, "config.json")
 
     try:
         with open(config_path) as f:
             config = json.load(f)
 
-        # Try various common config keys for context length
         context_keys = [
             "max_position_embeddings",
             "n_positions",
@@ -50,88 +46,62 @@ def get_model_context_length(model_path: str) -> int:
             if key in config:
                 return config[key]
 
-        # If no context length found, return reasonable default
         return 4096
 
     except (FileNotFoundError, json.JSONDecodeError, KeyError):
-        # Return default if config can't be read
         return 4096
 
 
 class MLXRunner:
-    """Direct MLX model runner with streaming and interactive capabilities."""
+    """Direct MLX model runner using mlx_engine."""
 
     def __init__(
         self, model_path: str, adapter_path: Optional[str] = None, verbose: bool = False
     ):
-        """Initialize the runner with a model.
-
-        Args:
-            model_path: Path to the MLX model directory
-            adapter_path: Optional path to LoRA adapter
-            verbose: Show detailed output
-        """
         self.model_path = Path(model_path)
         self.adapter_path = adapter_path
-        self.model = None
-        self.tokenizer = None
+        self.model_kit = None
         self._memory_baseline = None
-        self._stop_tokens = None  # Will be populated from tokenizer
-        self._message_end_tokens = None  # Message-end tokens (e.g., <|end|> for MXFP4)
-        self._chat_stop_tokens = None  # Chat-specific stop tokens
-        self._context_length = None  # Will be populated from model config
-        self._is_reasoning_model = False  # Whether model uses reasoning (MXFP4)
-        self._reasoning_start = None  # Reasoning start marker
-        self._reasoning_end = None  # Reasoning end marker
-        self._final_start = None  # Final answer start marker
+        self._context_length = None
         self.verbose = verbose
         self._model_loaded = False
-        self._context_entered = False  # Prevent nested context usage
+        self._context_entered = False
 
     def __enter__(self):
-        """Context manager entry - loads the model."""
         if self._context_entered:
-            raise RuntimeError(
-                "MLXRunner context manager cannot be entered multiple times"
-            )
-
+            raise RuntimeError("MLXRunner context manager cannot be entered multiple times")
         self._context_entered = True
         try:
             self.load_model()
             return self
         except Exception:
-            # If load_model fails, ensure cleanup happens
             self._context_entered = False
             self.cleanup()
             raise
 
     def __exit__(self, exc_type, exc_val, exc_tb):
-        """Context manager exit - cleans up the model."""
         self._context_entered = False
         self.cleanup()
-        return False  # Don't suppress exceptions
+        return False
 
     def load_model(self):
-        """Load the MLX model and tokenizer."""
+        """Load the model via mlx_engine."""
         if self._model_loaded:
-            if self.verbose:
-                print("Model already loaded, skipping...")
             return
 
         if self.verbose:
             print(f"Loading model from {self.model_path}...")
         start_time = time.time()
 
-        # Capture baseline memory before loading
         try:
             mx.clear_cache()
         except Exception:
-            pass  # Continue even if cache clear fails
+            pass
         self._memory_baseline = mx.get_active_memory() / 1024**3
 
         try:
-            # Load model and tokenizer
-            self.model, self.tokenizer = load(
+            # mlx_engine.load_model returns a ModelKit or VisionModelKit
+            self.model_kit = engine_load_model(
                 str(self.model_path), adapter_path=self.adapter_path
             )
 
@@ -141,274 +111,61 @@ def load_model(self):
 
             if self.verbose:
                 print(f"Model loaded in {load_time:.1f}s")
-                print(
-                    f"Memory: {model_memory:.1f}GB model, {current_memory:.1f}GB total"
-                )
-
-            # Extract stop tokens from tokenizer
-            self._extract_stop_tokens()
+                print(f"Memory: {model_memory:.1f}GB model, {current_memory:.1f}GB total")
 
-            # Extract context length from model config
             self._context_length = get_model_context_length(str(self.model_path))
-
-            if self.verbose:
-                print(f"Model context length: {self._context_length} tokens")
-
             self._model_loaded = True
 
         except Exception as e:
-            # Ensure partial state is cleaned up on failure
-            self.model = None
-            self.tokenizer = None
-            self._stop_tokens = None
+            self.model_kit = None
             self._model_loaded = False
-            # Clear any memory that might have been allocated
             mx.clear_cache()
-            raise RuntimeError(
-                f"Failed to load model from {self.model_path}: {e}"
-            ) from e
-
-    def _extract_stop_tokens(self):
-        """Extract stop tokens from the tokenizer dynamically.
-
-        This method identifies ALL tokens that should stop generation:
-        1. Official EOS token from tokenizer config
-        2. Message-end tokens from training (e.g., <|end|> for MXFP4)
-        3. Common stop tokens across models
-        """
-        self._stop_tokens = set()
-        self._message_end_tokens = (
-            set()
-        )  # Tokens that end messages but not conversations
-
-        # Primary source: eos_token
-        eos_token = getattr(self.tokenizer, "eos_token", None)
-        if eos_token:
-            self._stop_tokens.add(eos_token)
-
-        # Also check pad_token if it's different from eos_token
-        pad_token = getattr(self.tokenizer, "pad_token", None)
-        if pad_token and pad_token != eos_token:
-            self._stop_tokens.add(pad_token)
-
-        # Check additional_special_tokens
-        if hasattr(self.tokenizer, "additional_special_tokens"):
-            for token in self.tokenizer.additional_special_tokens:
-                if token and isinstance(token, str):
-                    # Only add tokens that look like stop/end tokens
-                    if any(
-                        keyword in token.lower() for keyword in ["end", "stop", "eot"]
-                    ):
-                        self._stop_tokens.add(token)
-
-        # MLX-LM 0.27.0+: Extract tokens from added_tokens_decoder (comprehensive source)
-        if hasattr(self.tokenizer, "added_tokens_decoder"):
-            for _token_id, token_info in self.tokenizer.added_tokens_decoder.items():
-                if isinstance(token_info, dict) and "content" in token_info:
-                    token_content = token_info["content"]
-                    if token_content and isinstance(token_content, str):
-                        token_lower = token_content.lower()
-
-                        # NOTE: <|end|> is NOT a stop token for MXFP4 models!
-                        # It's a separator between reasoning and final answer
-                        if token_content == "<|end|>":
-                            self._message_end_tokens.add(token_content)
-                            # Do NOT add as stop token - let model continue to final answer
-
-                        # Look for tokens that could be end/stop tokens
-                        # Expanded patterns for MLX-LM 0.27.0 token varieties
-                        # EXCLUDE <|end|> for MXFP4 models as it's a reasoning separator
-                        end_patterns = [
-                            "stop",
-                            "eot",
-                            "return",
-                            "finish",
-                            "done",
-                            "im_end",
-                        ]
-                        if any(pattern in token_lower for pattern in end_patterns):
-                            # Decide if it's a message-end or conversation-end token
-                            if "im_end" in token_lower:
-                                self._message_end_tokens.add(token_content)
-                            self._stop_tokens.add(token_content)
-                        # Special handling for 'end' pattern - more selective
-                        elif "end" in token_lower and token_content != "<|end|>":
-                            # Only add non-<|end|> tokens with 'end' in them
-                            self._stop_tokens.add(token_content)
-
-                        # Special case: control tokens in |..| format
-                        elif token_content.startswith("<|") and token_content.endswith(
-                            "|>"
-                        ):
-                            # Be inclusive with control tokens that might stop generation
-                            if any(
-                                pattern in token_lower
-                                for pattern in ["end", "return", "stop", "finish"]
-                            ):
-                                self._stop_tokens.add(token_content)
-
-        # Model-specific handling based on known patterns
-        # Use reasoning_utils for reasoning model detection and patterns
-        from ..reasoning_utils import ReasoningExtractor
-
-        if hasattr(self.tokenizer, "name_or_path"):
-            name_or_path = str(getattr(self.tokenizer, "name_or_path", "")).lower()
-            model_type = ReasoningExtractor.detect_model_type(name_or_path)
-
-            if model_type:
-                # This is a reasoning model
-                self._is_reasoning_model = True
-
-                # Get patterns from reasoning_utils
-                if model_type in ReasoningExtractor.PATTERNS:
-                    markers = ReasoningExtractor.PATTERNS[model_type]["markers"]
-                    self._reasoning_start = markers.get("reasoning_start")
-                    self._reasoning_end = markers.get("reasoning_end")
-                    self._final_start = markers.get("final_marker")
-
-                # For reasoning models, remove reasoning_end from stop tokens
-                if self._reasoning_end:
-                    self._stop_tokens.discard(self._reasoning_end)
-
-                # Add proper stop token for this model type
-                if model_type == "gpt-oss":
-                    if "<|return|>" not in self._stop_tokens:
-                        self._stop_tokens.add("<|return|>")
-            else:
-                self._is_reasoning_model = False
-        else:
-            self._is_reasoning_model = False
-
-        # Add common stop tokens that might not be in special tokens
-        common_stop_tokens = {"</s>", "<|endoftext|>", "<|im_end|>", "<|eot_id|>"}
-
-        # Add chat-specific stop tokens to prevent model self-conversations
-        # Based on our _format_conversation() format: "Human:" and "Assistant:"
-        # Also include "You:" as models might use UI-visible format
-        # Include single-letter variations (H:, A:, Y:) that some models use
-        chat_stop_tokens = {
-            "\nHuman:",
-            "\nAssistant:",
-            "\nYou:",
-            "\n\nHuman:",
-            "\n\nAssistant:",
-            "\n\nYou:",
-            "\nH:",
-            "\nA:",
-            "\nY:",  # Single-letter variations
-            "\n\nH:",
-            "\n\nA:",
-            "\n\nY:",
-        }
-
-        # Add common stop tokens only if they decode to themselves (i.e., they're single tokens)
-        for token in common_stop_tokens:
-            try:
-                # Try to encode and decode to verify it's a real single token
-                ids = self.tokenizer.encode(token, add_special_tokens=False)
-                if ids and len(ids) == 1:  # Single token ID means it's a special token
-                    decoded = self.tokenizer.decode(ids)
-                    if decoded == token:
-                        self._stop_tokens.add(token)
-            except:
-                pass
-
-        # Store chat stop tokens separately - only used in interactive chat mode
-        # This prevents stopping mid-story when user asks for dialogues
-        self._chat_stop_tokens = list(chat_stop_tokens)
-
-        # Remove any None values
-        self._stop_tokens.discard(None)
-        self._message_end_tokens.discard(None)
-
-        # Convert to list for easier use
-        self._stop_tokens = list(self._stop_tokens)
-        self._message_end_tokens = list(self._message_end_tokens)
-
-        if self.verbose:
-            if self._stop_tokens:
-                print(f"Stop tokens: {self._stop_tokens}")
-            if self._message_end_tokens:
-                print(f"Message end tokens: {self._message_end_tokens}")
+            raise RuntimeError(f"Failed to load model from {self.model_path}: {e}") from e
 
     def cleanup(self):
-        """Clean up model resources and clear GPU memory.
-
-        This method is safe to call multiple times and handles partial state cleanup.
-        """
+        """Clean up model resources."""
         if self.verbose and self._model_loaded:
-            memory_before = mx.get_active_memory() / 1024**3
-            print(f"Cleaning up model (memory before: {memory_before:.1f}GB)...")
-
-        # Always clean up, even if model wasn't fully loaded
-        self.model = None
-        self.tokenizer = None
-        self._stop_tokens = None
-        self._message_end_tokens = None
-        self._chat_stop_tokens = None
-        self._context_length = None
-        self._is_reasoning_model = False
-        self._reasoning_start = None
-        self._reasoning_end = None
-        self._final_start = None
-        self._model_loaded = False
+            print("Cleaning up model...")
 
-        # Force garbage collection and clear MLX cache
-        import gc
+        self.model_kit = None
+        self._model_loaded = False
 
         gc.collect()
         try:
             mx.clear_cache()
         except Exception:
-            pass  # Continue cleanup even if cache clear fails
-
-        if self.verbose:
-            memory_after = mx.get_active_memory() / 1024**3
-            if "memory_before" in locals():
-                memory_freed = memory_before - memory_after
-                print(
-                    f"Cleanup complete (memory after: {memory_after:.1f}GB, freed: {memory_freed:.1f}GB)"
-                )
-            else:
-                print(f"Cleanup complete (memory after: {memory_after:.1f}GB)")
+            pass
 
     def get_effective_max_tokens(
         self, requested_tokens: Optional[int], interactive: bool = False
     ) -> int:
-        """Get effective max tokens based on model context and usage mode.
-
-        Args:
-            requested_tokens: The requested max tokens (None if user didn't specify --max-tokens)
-            interactive: True if this is interactive mode (gets full context length)
-
-        Returns:
-            Effective max tokens to use
-        """
         if not self._context_length:
-            # Fallback when context length is unknown
-            fallback = 4096 if interactive else 2048
-            if self.verbose:
-                if requested_tokens is None:
-                    print(
-                        f"[WARNING] Model context length unknown, using fallback: {fallback} tokens"
-                    )
-                else:
-                    print(
-                        f"[WARNING] Model context length unknown, using user specified: {requested_tokens} tokens"
-                    )
-            return requested_tokens if requested_tokens is not None else fallback
-
-        if interactive:
-            if requested_tokens is None:
-                # User didn't specify --max-tokens: use full model context
-                return self._context_length
-            else:
-                # User specified --max-tokens explicitly: respect their choice but cap at context
-                return min(requested_tokens, self._context_length)
-        else:
-            # Server/batch mode uses half context length for DoS protection
-            server_limit = self._context_length // 2
-            return min(requested_tokens or server_limit, server_limit)
+            return requested_tokens or (4096 if interactive else 2048)
+
+        # In interactive mode, we aim for a larger context but leave room for the prompt
+        limit = self._context_length if interactive else self._context_length // 2
+        return min(requested_tokens or limit, limit)
+
+    def _get_chat_stop_tokens(self) -> List[str]:
+        """Get chat stop tokens from tokenizer."""
+        stop_tokens = []
+        if not self.model_kit:
+            return stop_tokens
+
+        tokenizer = self.model_kit.tokenizer
+        if hasattr(tokenizer, "eos_token") and tokenizer.eos_token:
+            stop_tokens.append(tokenizer.eos_token)
+
+        # Common chat tokens if not present in eos_token_ids
+        for token in ["<|end|>", "<|im_end|>", "</s>", "<|eot_id|>"]:
+            if token not in stop_tokens:
+                # Basic check if token exists in vocab
+                try:
+                    if tokenizer.encode(token, add_special_tokens=False):
+                        stop_tokens.append(token)
+                except Exception:
+                    pass
+        return stop_tokens
 
     def generate_streaming(
         self,
@@ -422,325 +179,105 @@ def generate_streaming(
         use_chat_stop_tokens: bool = False,
         interactive: bool = False,
         hide_reasoning: bool = False,
+        json_schema: Optional[str] = None,
     ) -> Iterator[str]:
-        """Generate text with streaming output.
-
-        Args:
-            prompt: Input prompt
-            max_tokens: Maximum tokens to generate
-            temperature: Sampling temperature
-            top_p: Top-p sampling parameter
-            repetition_penalty: Penalty for repeated tokens
-            repetition_context_size: Context size for repetition penalty
-            use_chat_template: Apply tokenizer's chat template if available
-            use_chat_stop_tokens: Include chat turn markers as stop tokens (for interactive mode)
-            interactive: True if this is interactive mode (affects token limits)
-
-        Yields:
-            Generated tokens as they are produced
-        """
-        if not self.model or not self.tokenizer:
+        """Generate text using mlx_engine's generator."""
+        if not self.model_kit:
             raise RuntimeError("Model not loaded. Call load_model() first.")
 
-        # Initialize reasoning parser if this is a reasoning model
-        reasoning_parser = None
-        if self._is_reasoning_model:
-            model_type = ReasoningExtractor.detect_model_type(
-                getattr(self.tokenizer, "name_or_path", "") or ""
-            )
-            reasoning_parser = StreamingReasoningParser(
-                model_type, hide_reasoning=hide_reasoning
-            )
-
-        # Apply context-aware token limits
-        effective_max_tokens = self.get_effective_max_tokens(max_tokens, interactive)
-
         # Apply chat template if available and requested
         if (
             use_chat_template
-            and hasattr(self.tokenizer, "chat_template")
-            and self.tokenizer.chat_template
+            and hasattr(self.model_kit.tokenizer, "chat_template")
+            and self.model_kit.tokenizer.chat_template
         ):
             messages = [{"role": "user", "content": prompt}]
-            formatted_prompt = self.tokenizer.apply_chat_template(
+            formatted_prompt = self.model_kit.tokenizer.apply_chat_template(
                 messages, tokenize=False, add_generation_prompt=True
             )
         else:
             formatted_prompt = prompt
 
-        # Tokenize the prompt
-        prompt_tokens = self.tokenizer.encode(formatted_prompt)
-        prompt_array = mx.array(prompt_tokens)
-
-        # Track generation metrics
-        start_time = time.time()
-        tokens_generated = 0
-
-        # Create sampler with our parameters
-        sampler = make_sampler(temp=temperature, top_p=top_p)
+        prompt_tokens = engine_tokenize(self.model_kit, formatted_prompt)
+        effective_max_tokens = self.get_effective_max_tokens(max_tokens, interactive)
 
-        # Create repetition penalty processor if needed
-        logits_processors = []
-        if repetition_penalty > 1.0:
-            logits_processors.append(
-                make_repetition_penalty(repetition_penalty, repetition_context_size)
-            )
+        # Handle stop strings/tokens
+        stop_strings = []
+        if use_chat_stop_tokens:
+            stop_strings.extend(self._get_chat_stop_tokens())
 
-        # Generate tokens one by one for streaming
-        generator = generate_step(
-            prompt=prompt_array,
-            model=self.model,
+        # Initialize reasoning parser
+        reasoning_parser = None
+        model_name = getattr(self.model_kit.tokenizer, "name_or_path", "") or ""
+        model_type = ReasoningExtractor.detect_model_type(str(model_name).lower())
+        if model_type:
+            reasoning_parser = StreamingReasoningParser(model_type, hide_reasoning=hide_reasoning)
+
+        generator = engine_create_generator(
+            self.model_kit,
+            prompt_tokens,
             max_tokens=effective_max_tokens,
-            sampler=sampler,
-            logits_processors=logits_processors if logits_processors else None,
+            temp=temperature,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            repetition_context_size=repetition_context_size,
+            json_schema=json_schema,
+            stop_strings=stop_strings if stop_strings else None,
         )
 
-        # Collect tokens and yield text
-        generated_tokens = []
-        previous_decoded = ""
-        accumulated_response = ""  # Track full response for stop token detection
-
-        # Keep a sliding window of recent tokens for context
-        context_window = 10  # Decode last N tokens for proper spacing
-
-        for token, _ in generator:
-            # Token might be an array or an int
-            token_id = token.item() if hasattr(token, "item") else token
-            generated_tokens.append(token_id)
-
-            # Use a sliding window approach for efficiency
-            start_idx = max(0, len(generated_tokens) - context_window)
-            window_tokens = generated_tokens[start_idx:]
-
-            # Decode the window
-            window_text = self.tokenizer.decode(window_tokens)
-
-            # Figure out what's new
-            if start_idx == 0:
-                # We're still within the context window
-                if window_text.startswith(previous_decoded):
-                    new_text = window_text[len(previous_decoded) :]
-                else:
-                    new_text = self.tokenizer.decode([token_id])
-                previous_decoded = window_text
-            else:
-                # We're beyond the context window, just decode the last token with context
-                # This is approximate but should preserve spaces
-                new_text = self.tokenizer.decode(window_tokens)
-                if len(window_tokens) > 1:
-                    prefix = self.tokenizer.decode(window_tokens[:-1])
-                    if new_text.startswith(prefix):
-                        new_text = new_text[len(prefix) :]
-                    else:
-                        new_text = self.tokenizer.decode([token_id])
-
-            if new_text:
-                # Update accumulated response for stop token checking
-                accumulated_response += new_text
-
-                # Filter out stop tokens with priority: native first, then chat fallback
-                # Check native stop tokens FIRST in accumulated response (highest priority)
-                native_stop_tokens = self._stop_tokens if self._stop_tokens else []
-                for stop_token in native_stop_tokens:
-                    if stop_token in accumulated_response:
-                        # Find the stop token position and yield everything before it
-                        stop_pos = accumulated_response.find(stop_token)
-                        # Calculate what text came before the stop token
-                        text_before_stop = accumulated_response[:stop_pos]
-                        # Calculate how much of that is new (not previously yielded)
-                        previously_yielded_length = len(accumulated_response) - len(
-                            new_text
-                        )
-                        if len(text_before_stop) > previously_yielded_length:
-                            # Yield only the new part before stop token
-                            new_part_before_stop = text_before_stop[
-                                previously_yielded_length:
-                            ]
-                            if new_part_before_stop:
-                                if reasoning_parser:
-                                    # Process through reasoning parser for formatting
-                                    for (
-                                        formatted_token
-                                    ) in reasoning_parser.process_token(
-                                        new_part_before_stop
-                                    ):
-                                        yield formatted_token
-                                else:
-                                    yield new_part_before_stop
-                        return  # Stop generation without yielding stop token
-
-                # Only check chat stop tokens if no native stop token found (fallback)
-                if use_chat_stop_tokens and self._chat_stop_tokens:
-                    for stop_token in self._chat_stop_tokens:
-                        if stop_token in accumulated_response:
-                            # Find the stop token position and yield everything before it
-                            stop_pos = accumulated_response.find(stop_token)
-                            # Calculate what text came before the stop token
-                            text_before_stop = accumulated_response[:stop_pos]
-                            # Calculate how much of that is new (not previously yielded)
-                            previously_yielded_length = len(accumulated_response) - len(
-                                new_text
-                            )
-                            if len(text_before_stop) > previously_yielded_length:
-                                # Yield only the new part before stop token
-                                new_part_before_stop = text_before_stop[
-                                    previously_yielded_length:
-                                ]
-                                if new_part_before_stop:
-                                    if reasoning_parser:
-                                        # Process through reasoning parser for formatting
-                                        for (
-                                            formatted_token
-                                        ) in reasoning_parser.process_token(
-                                            new_part_before_stop
-                                        ):
-                                            yield formatted_token
-                                    else:
-                                        yield new_part_before_stop
-                            return  # Stop generation without yielding stop token
-
-                # No stop token found, process the new text
+        for result in generator:
+            if result.text:
                 if reasoning_parser:
-                    # Process through reasoning parser for formatting
-                    for formatted_token in reasoning_parser.process_token(new_text):
+                    for formatted_token in reasoning_parser.process_token(result.text):
                         yield formatted_token
                 else:
-                    # Normal streaming for non-reasoning models
-                    yield new_text
-                tokens_generated += 1
-
-            # Check for EOS token - don't yield it
-            if token_id == self.tokenizer.eos_token_id:
+                    yield result.text
+            
+            if result.stop_condition:
                 break
 
-        # Finalize reasoning parser if used
         if reasoning_parser:
             yield from reasoning_parser.finalize()
 
-        # Print generation statistics if verbose
-        if self.verbose:
-            generation_time = time.time() - start_time
-            tokens_per_second = (
-                tokens_generated / generation_time if generation_time > 0 else 0
-            )
-            print(
-                f"\n\nGenerated {tokens_generated} tokens in {generation_time:.1f}s ({tokens_per_second:.1f} tokens/s)"
-            )
-
-    def generate_batch(
-        self,
-        prompt: str,
-        max_tokens: int = 500,
-        temperature: float = 0.7,
-        top_p: float = 0.9,
-        repetition_penalty: float = 1.1,
-        repetition_context_size: int = 20,
-        use_chat_template: bool = True,
-        interactive: bool = False,
-    ) -> str:
-        """Generate text in batch mode (non-streaming).
-
-        Args:
-            prompt: Input prompt
-            max_tokens: Maximum tokens to generate
-            temperature: Sampling temperature
-            top_p: Top-p sampling parameter
-            repetition_penalty: Penalty for repeated tokens
-            repetition_context_size: Context size for repetition penalty
-            use_chat_template: Apply tokenizer's chat template if available
-            interactive: True if this is interactive mode (affects token limits)
-
-        Returns:
-            Generated text
-        """
-        if not self.model or not self.tokenizer:
-            raise RuntimeError("Model not loaded. Call load_model() first.")
-
-        # Apply context-aware token limits
-        effective_max_tokens = self.get_effective_max_tokens(max_tokens, interactive)
-
-        # Apply chat template if available and requested
-        if (
-            use_chat_template
-            and hasattr(self.tokenizer, "chat_template")
-            and self.tokenizer.chat_template
-        ):
-            messages = [{"role": "user", "content": prompt}]
-            formatted_prompt = self.tokenizer.apply_chat_template(
+    def generate_batch(self, *args, **kwargs) -> str:
+        """Simple wrapper for generate_streaming to collect all tokens."""
+        return "".join(self.generate_streaming(*args, **kwargs))
+
+    def _format_conversation(self, messages: List[Dict[str, str]], use_chat_template: bool = True) -> str:
+        """Format conversation using tokenizer's template."""
+        if not self.model_kit:
+            raise RuntimeError("Model needed for formatting")
+            
+        if use_chat_template and hasattr(self.model_kit.tokenizer, "chat_template") and self.model_kit.tokenizer.chat_template:
+            return self.model_kit.tokenizer.apply_chat_template(
                 messages, tokenize=False, add_generation_prompt=True
             )
-        else:
-            formatted_prompt = prompt
-
-        start_time = time.time()
-
-        # Tokenize the prompt
-        prompt_tokens = self.tokenizer.encode(formatted_prompt)
-        prompt_array = mx.array(prompt_tokens)
-
-        # Create sampler with our parameters
-        sampler = make_sampler(temp=temperature, top_p=top_p)
-
-        # Create repetition penalty processor if needed
-        logits_processors = []
-        if repetition_penalty > 1.0:
-            logits_processors.append(
-                make_repetition_penalty(repetition_penalty, repetition_context_size)
-            )
-
-        # Generate all tokens at once
-        generated_tokens = []
-        all_tokens = list(prompt_tokens)  # Keep prompt for proper decoding
-
-        generator = generate_step(
-            prompt=prompt_array,
-            model=self.model,
-            max_tokens=effective_max_tokens,
-            sampler=sampler,
-            logits_processors=logits_processors if logits_processors else None,
-        )
-
-        for token, _ in generator:
-            # Token might be an array or an int
-            token_id = token.item() if hasattr(token, "item") else token
-            generated_tokens.append(token_id)
-            all_tokens.append(token_id)
-
-            # Check for EOS token - don't yield it
-            if token_id == self.tokenizer.eos_token_id:
-                break
-
-        # Decode all tokens together for proper spacing
-        full_response = self.tokenizer.decode(all_tokens)
-
-        # Remove the prompt part
-        if full_response.startswith(formatted_prompt):
-            response = full_response[len(formatted_prompt) :]
-        else:
-            # Fallback: just decode generated tokens
-            response = self.tokenizer.decode(generated_tokens)
-
-        # Apply end-token filtering (same logic as streaming mode for Issue #20)
-        response = self._filter_end_tokens_from_response(
-            response, use_chat_stop_tokens=False
-        )
-
-        # Format reasoning models output
-        response = self._format_reasoning_response(response)
+        
+        # Fallback manual formatting
+        formatted = ""
+        for msg in messages:
+            role = msg["role"].capitalize()
+            content = msg["content"]
+            formatted += f"{role}: {content}\n"
+        formatted += "Assistant: "
+        return formatted
 
-        generation_time = time.time() - start_time
-
-        # Count tokens for statistics
-        if self.verbose:
-            tokens_generated = len(generated_tokens)
-            tokens_per_second = (
-                tokens_generated / generation_time if generation_time > 0 else 0
-            )
-            print(
-                f"\nGenerated {tokens_generated} tokens in {generation_time:.1f}s ({tokens_per_second:.1f} tokens/s)"
-            )
+    def get_memory_usage(self) -> Dict[str, float]:
+        """Get current memory usage statistics in GB."""
+        try:
+            current_memory = mx.get_active_memory() / 1024**3
+            peak_memory = mx.get_peak_memory() / 1024**3
+        except Exception:
+            current_memory = 0.0
+            peak_memory = 0.0
 
-        return response
+        return {
+            "current_gb": current_memory,
+            "peak_gb": peak_memory,
+            "model_gb": (
+                current_memory - self._memory_baseline if self._memory_baseline else 0
+            ),
+        }
 
     def interactive_chat(
         self,
@@ -751,16 +288,7 @@ def interactive_chat(
         repetition_penalty: float = 1.1,
         use_chat_template: bool = True,
     ):
-        """Run an interactive chat session.
-
-        Args:
-            system_prompt: Optional system prompt to prepend
-            max_tokens: Maximum tokens per response
-            temperature: Sampling temperature
-            top_p: Top-p sampling parameter
-            repetition_penalty: Penalty for repeated tokens
-            use_chat_template: Use tokenizer's chat template if available
-        """
+        """Run an interactive chat session."""
         print("Starting interactive chat. Type 'exit' or 'quit' to end.\n")
 
         conversation_history = []
@@ -769,7 +297,6 @@ def interactive_chat(
 
         while True:
             try:
-                # Get user input
                 user_input = input("You: ").strip()
 
                 if user_input.lower() in ["exit", "quit", "q"]:
@@ -779,15 +306,9 @@ def interactive_chat(
                 if not user_input:
                     continue
 
-                # Add user message to history
                 conversation_history.append({"role": "user", "content": user_input})
+                prompt = self._format_conversation(conversation_history, use_chat_template=use_chat_template)
 
-                # Format conversation for the model using chat template if available
-                prompt = self._format_conversation(
-                    conversation_history, use_chat_template=use_chat_template
-                )
-
-                # Generate response with streaming
                 print("\nAssistant: ", end="", flush=True)
 
                 response_tokens = []
@@ -797,21 +318,16 @@ def interactive_chat(
                     temperature=temperature,
                     top_p=top_p,
                     repetition_penalty=repetition_penalty,
-                    use_chat_template=False,  # Already applied in _format_conversation
-                    use_chat_stop_tokens=True,  # Enable chat stop tokens in interactive mode
-                    interactive=True,  # Enable full context length for interactive mode
+                    use_chat_template=False, 
+                    use_chat_stop_tokens=True,
+                    interactive=True,
                 ):
-                    # Stream all tokens directly (already formatted by generate_streaming)
                     print(token, end="", flush=True)
                     response_tokens.append(token)
 
-                # Add assistant response to history
+                print()
                 assistant_response = "".join(response_tokens).strip()
-                conversation_history.append(
-                    {"role": "assistant", "content": assistant_response}
-                )
-
-                print()  # New line after response
+                conversation_history.append({"role": "assistant", "content": assistant_response})
 
             except KeyboardInterrupt:
                 print("\n\nChat interrupted. Goodbye!")
@@ -820,214 +336,6 @@ def interactive_chat(
                 print(f"\n[ERROR] {e}")
                 continue
 
-    def _format_conversation(
-        self, messages: list, use_chat_template: bool = True
-    ) -> str:
-        """Format conversation history into a prompt.
-
-        Uses the tokenizer's chat template if available, otherwise falls back
-        to the legacy Human:/Assistant: format for compatibility.
-
-        Args:
-            messages: List of message dictionaries with 'role' and 'content'
-            use_chat_template: Whether to use chat template if available
-
-        Returns:
-            Formatted conversation string
-        """
-        # Try to use native chat template if available
-        if (
-            use_chat_template
-            and hasattr(self.tokenizer, "chat_template")
-            and self.tokenizer.chat_template
-        ):
-            try:
-                # Apply the tokenizer's chat template
-                formatted_prompt = self.tokenizer.apply_chat_template(
-                    messages, tokenize=False, add_generation_prompt=True
-                )
-                return formatted_prompt
-            except Exception as e:
-                # If chat template fails, fall back to legacy format
-                if self.verbose:
-                    print(f"[WARNING] Chat template failed, using legacy format: {e}")
-
-        # Legacy format fallback for compatibility
-        return self._legacy_format_conversation(messages)
-
-    def _legacy_format_conversation(self, messages: list) -> str:
-        """Legacy conversation formatting for backward compatibility.
-
-        This format was used in earlier versions and remains as a fallback
-        for models without chat templates.
-        """
-        formatted = []
-
-        for message in messages:
-            role = message["role"]
-            content = message["content"]
-
-            if role == "system":
-                formatted.append(f"System: {content}")
-            elif role == "user":
-                formatted.append(f"Human: {content}")
-            elif role == "assistant":
-                formatted.append(f"Assistant: {content}")
-
-        # Add prompt for next assistant response
-        formatted.append("Assistant:")
-
-        return "\n\n".join(formatted)
-
-    def get_memory_usage(self) -> Dict[str, float]:
-        """Get current memory usage statistics.
-
-        Returns:
-            Dictionary with memory statistics in GB
-        """
-        try:
-            current_memory = mx.get_active_memory() / 1024**3
-            peak_memory = mx.get_peak_memory() / 1024**3
-        except Exception:
-            # Return zeros if memory stats unavailable
-            current_memory = 0.0
-            peak_memory = 0.0
-
-        return {
-            "current_gb": current_memory,
-            "peak_gb": peak_memory,
-            "model_gb": (
-                current_memory - self._memory_baseline if self._memory_baseline else 0
-            ),
-        }
-
-    def _format_reasoning_response(self, response: str) -> str:
-        """Format response from reasoning models for better readability.
-
-        For MXFP4 models that generate reasoning followed by final answer,
-        format it nicely for display.
-        """
-        if not self._is_reasoning_model:
-            return response
-
-        # Check if response contains reasoning markers
-        if self._reasoning_start in response and self._final_start in response:
-            # Extract reasoning and final parts
-            try:
-                # Split on the reasoning start
-                before_reasoning, after_start = response.split(self._reasoning_start, 1)
-
-                # Find the reasoning content (until <|end|>)
-                if self._reasoning_end in after_start:
-                    reasoning_content, after_reasoning = after_start.split(
-                        self._reasoning_end, 1
-                    )
-
-                    # Find the final answer
-                    if self._final_start in after_reasoning:
-                        # Extract everything after final marker
-                        final_parts = after_reasoning.split(self._final_start, 1)
-                        if len(final_parts) > 1:
-                            # Remove the <|channel|>final<|message|> marker
-                            final_answer = final_parts[1].replace(
-                                "<|channel|>final<|message|>", "", 1
-                            )
-
-                            # Format with clear markers for parsing but minimal visual impact
-                            formatted = []
-                            formatted.append("\n**[Reasoning]**\n")
-                            formatted.append(reasoning_content.strip())
-                            formatted.append("\n\n---\n\n**[Answer]**\n")
-                            formatted.append(final_answer.strip())
-
-                            return "\n".join(formatted)
-            except Exception:
-                # If parsing fails, return original
-                pass
-
-        # Fallback: just clean up the control tokens
-        cleaned = response
-        for marker in [
-            "<|channel|>analysis<|message|>",
-            "<|end|>",
-            "<|start|>assistant",
-            "<|channel|>final<|message|>",
-            "<|return|>",
-        ]:
-            cleaned = cleaned.replace(marker, "")
-
-        return cleaned.strip()
-
-    def _filter_end_tokens_from_response(
-        self, response: str, use_chat_stop_tokens: bool = False
-    ) -> str:
-        """Filter end tokens from a complete response (batch mode).
-
-        This method applies the same filtering logic as the streaming mode
-        to ensure consistent behavior between streaming and non-streaming.
-
-        Args:
-            response: The complete generated response
-            use_chat_stop_tokens: Whether to apply chat stop tokens
-
-        Returns:
-            Response with end tokens filtered out
-        """
-        # Apply native stop token filtering FIRST (highest priority)
-        native_stop_tokens = self._stop_tokens if self._stop_tokens else []
-        for stop_token in native_stop_tokens:
-            if stop_token in response:
-                # Find the stop token position and return everything before it
-                stop_pos = response.find(stop_token)
-                filtered_response = response[:stop_pos].rstrip()
-                if self.verbose:
-                    print(
-                        f"[DEBUG] Filtered stop token '{stop_token}' at position {stop_pos}"
-                    )
-                return filtered_response
-
-        # Only check chat stop tokens if no native stop token found (fallback)
-        if use_chat_stop_tokens and self._chat_stop_tokens:
-            for stop_token in self._chat_stop_tokens:
-                if stop_token in response:
-                    # Find the stop token position and return everything before it
-                    stop_pos = response.find(stop_token)
-                    return response[:stop_pos]
-
-        # No stop tokens found, return original response
-        return response
-
-
-def get_gpu_status() -> Dict[str, float]:
-    """Independent GPU status check - usable from anywhere.
-
-    Returns:
-        Dictionary with GPU memory statistics in GB
-    """
-    return {
-        "active_memory_gb": mx.get_active_memory() / 1024**3,
-        "peak_memory_gb": mx.get_peak_memory() / 1024**3,
-    }
-
-
-def check_memory_available(required_gb: float) -> bool:
-    """Pre-flight check before model loading.
-
-    Args:
-        required_gb: Required memory in GB
-
-    Returns:
-        True if memory is likely available (conservative estimate)
-    """
-    current_memory = mx.get_active_memory() / 1024**3
-
-    # Conservative estimate: assume system has at least 8GB unified memory
-    # and we should leave some headroom (2GB) for system processes
-    estimated_total = 8.0  # This could be improved by detecting actual system memory
-    available = estimated_total - current_memory - 2.0  # 2GB headroom
-
-    return available >= required_gb
-
 
 def run_model_enhanced(
     model_path: str,
@@ -1042,26 +350,9 @@ def run_model_enhanced(
     hide_reasoning: bool = False,
     verbose: bool = False,
 ) -> Optional[str]:
-    """Enhanced run function with direct MLX integration.
-
-    Uses context manager pattern for automatic resource cleanup.
-
-    Args:
-        model_path: Path to the MLX model
-        prompt: Input prompt (if None, enters interactive mode)
-        interactive: Force interactive mode
-        max_tokens: Maximum tokens to generate
-        temperature: Sampling temperature
-        top_p: Top-p sampling parameter
-        repetition_penalty: Penalty for repeated tokens
-        stream: Whether to stream output
-
-    Returns:
-        Generated text (in non-interactive mode)
-    """
+    """Enhanced run function with direct MLX integration."""
     try:
         with MLXRunner(model_path, verbose=verbose) as runner:
-            # Interactive mode
             if interactive or prompt is None:
                 runner.interactive_chat(
                     max_tokens=max_tokens,
@@ -1072,13 +363,11 @@ def run_model_enhanced(
                 )
                 return None
 
-            # Single prompt mode
             if verbose:
                 print(f"\nPrompt: {prompt}\n")
                 print("Response: ", end="", flush=True)
 
             if stream:
-                # Streaming generation
                 response_tokens = []
                 try:
                     for token in runner.generate_streaming(
@@ -1090,14 +379,12 @@ def run_model_enhanced(
                         use_chat_template=use_chat_template,
                         hide_reasoning=hide_reasoning,
                     ):
-                        # Stream all tokens directly (already formatted by generate_streaming)
                         print(token, end="", flush=True)
                         response_tokens.append(token)
                 except KeyboardInterrupt:
                     print("\n[INFO] Generation interrupted by user.")
                 response = "".join(response_tokens)
             else:
-                # Batch generation
                 try:
                     response = runner.generate_batch(
                         prompt=prompt,
@@ -1112,17 +399,37 @@ def run_model_enhanced(
                     response = ""
                 print(response)
 
-            # Show memory usage if verbose
-            if verbose:
-                memory_stats = runner.get_memory_usage()
-                print(
-                    f"\n\nMemory: {memory_stats['model_gb']:.1f}GB model, {memory_stats['current_gb']:.1f}GB total"
-                )
-
             return response
 
-        # Note: cleanup happens automatically due to context manager
-
     except Exception as e:
         print(f"\n[ERROR] {e}")
         return None
+
+
+def get_gpu_status() -> Dict[str, float]:
+    """Independent GPU status check."""
+    try:
+        return {
+            "active_memory_gb": mx.get_active_memory() / 1024**3,
+            "peak_memory_gb": mx.get_peak_memory() / 1024**3,
+        }
+    except Exception:
+        return {"active_memory_gb": 0.0, "peak_memory_gb": 0.0}
+
+
+def check_memory_available(required_gb: float) -> bool:
+    """Pre-flight check before model loading using actual system RAM."""
+    try:
+        current_memory = mx.get_active_memory() / (1024**3)
+        try:
+            estimated_total_gb = psutil.virtual_memory().total / (1024**3)
+        except Exception:
+            estimated_total_gb = 16.0  # Conservative fallback
+            logger.warning("Could not detect total RAM via psutil, falling back to 16GB.")
+
+        # Calculate available memory with some headroom (2.0 GB)
+        available = estimated_total_gb - current_memory - 2.0
+        return available >= required_gb
+    except Exception as e:
+        logger.warning(f"Error checking memory availability: {e}")
+        return True  # Proceed anyway if check fails
diff --git a/server/main.py b/server/main.py
index 7ca2602..13126fb 100644
--- a/server/main.py
+++ b/server/main.py
@@ -36,22 +36,7 @@ async def log_requests(request: Request, call_next):
     logger.info(f"<-- {request.method} {request.url.path} {response.status_code}")
     return response
 
-def get_backend():
-    """
-    Dynamically choose which backend should be used depending on the OS 
-    """
-    if sys.platform == "darwin":
-        from .backend import mlx
-        logger.info("Using MLX backend (MacOs)")
-        return mlx
-    elif sys.platform.startswith("linux"):
-        from .backend import linux
-        logger.info(f"Using linux backend {sys.platform}")
-        return linux
-    else:
-        raise RuntimeError(f"Unsupported OS: {sys.platform}")
-
-runtime.backend = get_backend()
+# runtime.backend = get_backend()
 
 def run():
     uvicorn.run(app, host="127.0.0.1", port=PORT)
diff --git a/server/pyproject.toml b/server/pyproject.toml
index 8c38c79..27b8004 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -4,11 +4,18 @@ version = "0.1.0"
 description = "Local MLX inference server for the Tiles CLI"
 requires-python = "==3.13.*"
 dependencies = [
-    "fastapi",
-    "uvicorn",
-    "mlx-lm",
-    "black",
-    "huggingface-hub>=0.34.0",
+    "fastapi==0.119.0",
+    "uvicorn==0.38.0",
+    "mlx-lm==0.28.3",
+    "mlx-vlm==0.3.9",
+    "outlines==1.2.9",
+    "outlines-core==0.1.26",
+    "psutil==7.2.1",
+    "sentencepiece==0.2.1",
+    "pillow==12.1.0",
+    "transformers==4.49.0",
+    "black==25.9.0",
+    "huggingface-hub==0.35.3",
 ]
 
 [build-system]
diff --git a/server/runtime.py b/server/runtime.py
index e1399f8..b6e7387 100644
--- a/server/runtime.py
+++ b/server/runtime.py
@@ -1 +1,19 @@
-backend = None
+import sys
+import logging
+
+logger = logging.getLogger("app")
+
+def get_backend():
+    """
+    Dynamically choose which backend should be used depending on the OS 
+    """
+    if sys.platform == "darwin":
+        from .backend import mlx_backend
+        return mlx_backend
+    elif sys.platform.startswith("linux"):
+        from .backend import linux
+        return linux
+    else:
+        return None
+
+backend = get_backend()
diff --git a/server/schemas.py b/server/schemas.py
index 8632684..e856755 100644
--- a/server/schemas.py
+++ b/server/schemas.py
@@ -28,6 +28,7 @@ class ChatCompletionRequest(BaseModel):
     stream: Optional[bool] = False
     stop: Optional[Union[str, List[str]]] = None
     repetition_penalty: Optional[float] = 1.1
+    response_format: Optional[Dict[str, Any]] = None
 
 
 class CompletionResponse(BaseModel):
@@ -63,3 +64,23 @@ class StartRequest(BaseModel):
 
 class downloadRequest(BaseModel):
     model: str
+
+
+class ResponseRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    max_tokens: Optional[int] = None
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 0.9
+    stop: Optional[Union[str, List[str]]] = None
+    repetition_penalty: Optional[float] = 1.1
+    response_format: Optional[Dict[str, Any]] = None
+
+
+class ResponseResponse(BaseModel):
+    id: str
+    object: str = "chat.completion"
+    created: int
+    model: str
+    choices: List[Dict[str, Any]]
+    usage: Optional[Dict[str, int]] = None
diff --git a/server/tests/test_inference.py b/server/tests/test_inference.py
new file mode 100644
index 0000000..a6415ee
--- /dev/null
+++ b/server/tests/test_inference.py
@@ -0,0 +1,67 @@
+import os
+import sys
+from pathlib import Path
+
+# Add the project root to sys.path
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from server.backend.mlx_runner import MLXRunner
+from server.cache_utils import get_model_path
+
+def test_inference():
+    model_spec = "driaforall/mem-agent-mlx-4bit"
+    print(f"Testing inference with model: {model_spec}")
+    
+    try:
+        model_path, model_name, commit_hash = get_model_path(model_spec)
+        if model_path is None or not model_path.exists():
+            print(f"Error: Model {model_spec} not found in cache. Please download it first.")
+            return
+
+        runner = MLXRunner(str(model_path), verbose=True)
+        
+        with runner:
+            print("\n--- Testing Streaming Generation ---")
+            prompt = "Why is the sky blue? Answer in one sentence."
+            full_response = ""
+            for token in runner.generate_streaming(prompt, max_tokens=50):
+                print(token, end="", flush=True)
+                full_response += token
+            print("\n--- Streaming Done ---\n")
+            
+            print("\n--- Memory Usage ---")
+            print(runner.get_memory_usage())
+
+            print("\n--- Testing JSON Structured Output ---")
+            json_schema = {
+                "type": "object",
+                "properties": {
+                    "answer": {"type": "string"},
+                    "confidence": {"type": "number"}
+                },
+                "required": ["answer", "confidence"]
+            }
+            import json
+            prompt = "What is the capital of France? Return in JSON format."
+            full_response = ""
+            for token in runner.generate_streaming(prompt, max_tokens=100, json_schema=json.dumps(json_schema)):
+                print(token, end="", flush=True)
+                full_response += token
+            print("\n--- JSON Done ---\n")
+            
+            try:
+                parsed = json.loads(full_response)
+                print(f"Parsed JSON: {parsed}")
+                assert "answer" in parsed
+                assert "confidence" in parsed
+                print("JSON Schema verification PASSED")
+            except Exception as e:
+                print(f"JSON Schema verification FAILED: {e}")
+
+    except Exception as e:
+        print(f"Tests failed with error: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    test_inference()
diff --git a/server/tests/test_lora.py b/server/tests/test_lora.py
new file mode 100644
index 0000000..03385cd
--- /dev/null
+++ b/server/tests/test_lora.py
@@ -0,0 +1,46 @@
+import os
+import sys
+from pathlib import Path
+import unittest
+from unittest.mock import MagicMock, patch
+
+# Add the project root to sys.path
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from server.backend.mlx_runner import MLXRunner
+
+class TestLoRA(unittest.TestCase):
+    @patch("server.backend.mlx_runner.engine_load_model")
+    def test_lora_path_passed(self, mock_load):
+        """Verify that adapter_path is actually passed to the engine loader."""
+        model_path = "/tmp/fake_model"
+        adapter_path = "/tmp/fake_adapter"
+        
+        runner = MLXRunner(model_path, adapter_path=adapter_path)
+        runner.load_model()
+        
+        mock_load.assert_called_once_with(
+            model_path, adapter_path=adapter_path
+        )
+        print("LoRA path propagation verification (MLXRunner -> engine): PASSED")
+
+    @patch("mlx_lm.utils.load")
+    def test_model_kit_passes_adapter(self, mock_mlx_load):
+        """Verify that ModelKit passes the adapter path to mlx_lm."""
+        from server.backend.mlx_engine.model_kit.model_kit import ModelKit
+        
+        # Fixed mock return value to avoid unpack error
+        mock_mlx_load.return_value = (MagicMock(), MagicMock())
+        
+        # Mocking config.json
+        with patch("pathlib.Path.read_text", return_value='{"model_type": "llama"}'):
+            with patch("json.loads", return_value={"model_type": "llama"}):
+                model_kit = ModelKit(Path("/tmp/fake_model"), adapter_path="/tmp/fake_adapter")
+                
+                mock_mlx_load.assert_called_once()
+                _, kwargs = mock_mlx_load.call_args
+                self.assertEqual(kwargs.get("adapter_path"), Path("/tmp/fake_adapter"))
+                print("ModelKit LoRA path propagation (ModelKit -> mlx_lm): PASSED")
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/server/tests/test_responses_api.py b/server/tests/test_responses_api.py
new file mode 100644
index 0000000..845bb53
--- /dev/null
+++ b/server/tests/test_responses_api.py
@@ -0,0 +1,103 @@
+import sys
+import os
+from pathlib import Path
+
+# Add the project root to sys.path
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from fastapi.testclient import TestClient
+from server.api import app
+from server.backend.mlx_backend import get_or_load_model
+import json
+
+def test_responses_api():
+    client = TestClient(app)
+    
+    # We use a model that is likely to be present in the cache
+    model_spec = "driaforall/mem-agent-mlx-4bit"
+    print(f"Testing Responses API with model: {model_spec}")
+    
+    # Pre-load the model to avoid timeout in test
+    try:
+        from server.cache_utils import get_model_path
+        model_path, _, _ = get_model_path(model_spec)
+        if not model_path or not model_path.exists():
+             print(f"Skipping test, model {model_spec} not found in cache.")
+             return
+             
+        get_or_load_model(model_spec)
+    except Exception as e:
+        print(f"Skipping test, could not load model: {e}")
+        return
+
+    payload = {
+        "model": model_spec,
+        "messages": [
+            {"role": "user", "content": "What is the capital of France? Answer with just the name of the city."}
+        ],
+        "temperature": 0.0,
+        "max_tokens": 10
+    }
+    
+    print("Sending request to /v1/responses...")
+    try:
+        response = client.post("/v1/responses", json=payload)
+        
+        print(f"Status Code: {response.status_code}")
+        if response.status_code == 200:
+            data = response.json()
+            print(f"Response: {json.dumps(data, indent=2)}")
+            assert data["object"] == "chat.completion"
+            assert "choices" in data
+            assert len(data["choices"]) > 0
+            assert "message" in data["choices"][0]
+            assert "usage" in data
+            
+            content = data["choices"][0]["message"]["content"]
+            assert len(content) > 0
+            print(f"Response content: {content}")
+            print("Chat completion test PASSED")
+        else:
+            print(f"Test FAILED: {response.text}")
+            return
+
+        print("\n--- Testing JSON Structured Output ---")
+        json_schema = {
+            "type": "object",
+            "properties": {
+                "capital": {"type": "string"},
+                "country": {"type": "string"}
+            },
+            "required": ["capital", "country"]
+        }
+        
+        payload["messages"] = [{"role": "user", "content": "What is the capital of Germany? Respond in JSON."}]
+        payload["response_format"] = {
+            "type": "json_schema",
+            "json_schema": {"schema": json_schema}
+        }
+        payload["max_tokens"] = 100
+        
+        print("Sending request to /v1/responses with JSON schema...")
+        response = client.post("/v1/responses", json=payload)
+        
+        print(f"Status Code: {response.status_code}")
+        if response.status_code == 200:
+            data = response.json()
+            print(f"Response: {json.dumps(data, indent=2)}")
+            content = data["choices"][0]["message"]["content"]
+            parsed = json.loads(content)
+            print(f"Parsed JSON: {parsed}")
+            assert parsed["capital"].lower() == "berlin"
+            assert parsed["country"].lower() == "germany"
+            print("JSON schema test PASSED")
+        else:
+            print(f"JSON schema test FAILED: {response.text}")
+
+    except Exception as e:
+        print(f"Test FAILED with exception: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    test_responses_api()
diff --git a/server/uv.lock b/server/uv.lock
index 863c09c..8b0a9ae 100644
--- a/server/uv.lock
+++ b/server/uv.lock
@@ -2,6 +2,70 @@ version = 1
 revision = 3
 requires-python = "==3.13.*"
 
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" },
+]
+
+[[package]]
+name = "aiohttp"
+version = "3.13.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohappyeyeballs" },
+    { name = "aiosignal" },
+    { name = "attrs" },
+    { name = "frozenlist" },
+    { name = "multidict" },
+    { name = "propcache" },
+    { name = "yarl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/8a/12ca489246ca1faaf5432844adbfce7ff2cc4997733e0af120869345643a/aiohttp-3.13.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c", size = 734190, upload-time = "2026-01-03T17:30:45.832Z" },
+    { url = "https://files.pythonhosted.org/packages/32/08/de43984c74ed1fca5c014808963cc83cb00d7bb06af228f132d33862ca76/aiohttp-3.13.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9", size = 491783, upload-time = "2026-01-03T17:30:47.466Z" },
+    { url = "https://files.pythonhosted.org/packages/17/f8/8dd2cf6112a5a76f81f81a5130c57ca829d101ad583ce57f889179accdda/aiohttp-3.13.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3", size = 490704, upload-time = "2026-01-03T17:30:49.373Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/40/a46b03ca03936f832bc7eaa47cfbb1ad012ba1be4790122ee4f4f8cba074/aiohttp-3.13.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf", size = 1720652, upload-time = "2026-01-03T17:30:50.974Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/7e/917fe18e3607af92657e4285498f500dca797ff8c918bd7d90b05abf6c2a/aiohttp-3.13.3-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6", size = 1692014, upload-time = "2026-01-03T17:30:52.729Z" },
+    { url = "https://files.pythonhosted.org/packages/71/b6/cefa4cbc00d315d68973b671cf105b21a609c12b82d52e5d0c9ae61d2a09/aiohttp-3.13.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d", size = 1759777, upload-time = "2026-01-03T17:30:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/e3/e06ee07b45e59e6d81498b591fc589629be1553abb2a82ce33efe2a7b068/aiohttp-3.13.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261", size = 1861276, upload-time = "2026-01-03T17:30:56.512Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/24/75d274228acf35ceeb2850b8ce04de9dd7355ff7a0b49d607ee60c29c518/aiohttp-3.13.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0", size = 1743131, upload-time = "2026-01-03T17:30:58.256Z" },
+    { url = "https://files.pythonhosted.org/packages/04/98/3d21dde21889b17ca2eea54fdcff21b27b93f45b7bb94ca029c31ab59dc3/aiohttp-3.13.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730", size = 1556863, upload-time = "2026-01-03T17:31:00.445Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/84/da0c3ab1192eaf64782b03971ab4055b475d0db07b17eff925e8c93b3aa5/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91", size = 1682793, upload-time = "2026-01-03T17:31:03.024Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0f/5802ada182f575afa02cbd0ec5180d7e13a402afb7c2c03a9aa5e5d49060/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3", size = 1716676, upload-time = "2026-01-03T17:31:04.842Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8c/714d53bd8b5a4560667f7bbbb06b20c2382f9c7847d198370ec6526af39c/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4", size = 1733217, upload-time = "2026-01-03T17:31:06.868Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/79/e2176f46d2e963facea939f5be2d26368ce543622be6f00a12844d3c991f/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998", size = 1552303, upload-time = "2026-01-03T17:31:08.958Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/6a/28ed4dea1759916090587d1fe57087b03e6c784a642b85ef48217b0277ae/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0", size = 1763673, upload-time = "2026-01-03T17:31:10.676Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/35/4a3daeb8b9fab49240d21c04d50732313295e4bd813a465d840236dd0ce1/aiohttp-3.13.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591", size = 1721120, upload-time = "2026-01-03T17:31:12.575Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/9f/d643bb3c5fb99547323e635e251c609fbbc660d983144cfebec529e09264/aiohttp-3.13.3-cp313-cp313-win32.whl", hash = "sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf", size = 427383, upload-time = "2026-01-03T17:31:14.382Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/f1/ab0395f8a79933577cdd996dd2f9aa6014af9535f65dddcf88204682fe62/aiohttp-3.13.3-cp313-cp313-win_amd64.whl", hash = "sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e", size = 453899, upload-time = "2026-01-03T17:31:15.958Z" },
+]
+
+[[package]]
+name = "aiosignal"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "frozenlist" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
+]
+
+[[package]]
+name = "airportsdata"
+version = "20250909"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/c6/17ae8a65f7fa5bbbeee166f8070063eb8b70c89501a65c2e6885db61fc08/airportsdata-20250909.tar.gz", hash = "sha256:f39974fe1101817ced4ccf7c6ed336408469e5e778395d0a3e7a5112ec298f90", size = 907204, upload-time = "2025-09-09T01:07:31.256Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/87/59b570b9c4b014532777dc3baffc9bea10cf0cc8b232cf3c17e4bd0754a6/airportsdata-20250909-py3-none-any.whl", hash = "sha256:ce7dc6e1485afe3915e708212c7024ad158470c1c934e6a6cb217cf28b798ac7", size = 914391, upload-time = "2025-09-09T01:07:29.364Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -24,6 +88,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
 ]
 
+[[package]]
+name = "attrs"
+version = "25.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
+]
+
 [[package]]
 name = "black"
 version = "25.9.0"
@@ -54,6 +127,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286, upload-time = "2025-10-05T04:12:14.03Z" },
 ]
 
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.4"
@@ -91,6 +187,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295, upload-time = "2025-09-18T17:32:22.42Z" },
 ]
 
+[[package]]
+name = "cloudpickle"
+version = "3.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" },
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -100,6 +205,49 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
 ]
 
+[[package]]
+name = "datasets"
+version = "4.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill" },
+    { name = "filelock" },
+    { name = "fsspec", extra = ["http"] },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "multiprocess" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pandas" },
+    { name = "pyarrow" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "tqdm" },
+    { name = "xxhash" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/54/9359803da96bc65439a28fbb014dc2c90b7d4d8034a93b72362b0d40191f/datasets-4.4.2.tar.gz", hash = "sha256:9de16e415c4ba4713eac0493f7c7dc74f3aa21599297f00cc6ddab409cb7b24b", size = 586474, upload-time = "2025-12-19T15:03:09.129Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/b5/fefa518c809de7bced5cddb7c21c010da66fa2ae494bda96844a280cc6ce/datasets-4.4.2-py3-none-any.whl", hash = "sha256:6f5ef3417504d9cd663c71c1b90b9a494ff4c2076a2cd6a6e40ceee6ad95befc", size = 512268, upload-time = "2025-12-19T15:03:07.087Z" },
+]
+
+[[package]]
+name = "dill"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/3d/9373ad9c56321fdab5b41197068e1d8c25883b3fea29dd361f9b55116869/dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049", size = 119668, upload-time = "2025-04-16T00:41:47.671Z" },
+]
+
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.119.0"
@@ -123,6 +271,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" },
 ]
 
+[[package]]
+name = "frozenlist"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2d/40/0832c31a37d60f60ed79e9dfb5a92e1e2af4f40a16a29abcc7992af9edff/frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a", size = 85717, upload-time = "2025-10-06T05:36:27.341Z" },
+    { url = "https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7", size = 49651, upload-time = "2025-10-06T05:36:28.855Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40", size = 49417, upload-time = "2025-10-06T05:36:29.877Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027", size = 234391, upload-time = "2025-10-06T05:36:31.301Z" },
+    { url = "https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822", size = 233048, upload-time = "2025-10-06T05:36:32.531Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/c0/8746afb90f17b73ca5979c7a3958116e105ff796e718575175319b5bb4ce/frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121", size = 226549, upload-time = "2025-10-06T05:36:33.706Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/eb/4c7eefc718ff72f9b6c4893291abaae5fbc0c82226a32dcd8ef4f7a5dbef/frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5", size = 239833, upload-time = "2025-10-06T05:36:34.947Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/4e/e5c02187cf704224f8b21bee886f3d713ca379535f16893233b9d672ea71/frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e", size = 245363, upload-time = "2025-10-06T05:36:36.534Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/96/cb85ec608464472e82ad37a17f844889c36100eed57bea094518bf270692/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11", size = 229314, upload-time = "2025-10-06T05:36:38.582Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6f/4ae69c550e4cee66b57887daeebe006fe985917c01d0fff9caab9883f6d0/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1", size = 243365, upload-time = "2025-10-06T05:36:40.152Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/58/afd56de246cf11780a40a2c28dc7cbabbf06337cc8ddb1c780a2d97e88d8/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1", size = 237763, upload-time = "2025-10-06T05:36:41.355Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/36/cdfaf6ed42e2644740d4a10452d8e97fa1c062e2a8006e4b09f1b5fd7d63/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8", size = 240110, upload-time = "2025-10-06T05:36:42.716Z" },
+    { url = "https://files.pythonhosted.org/packages/03/a8/9ea226fbefad669f11b52e864c55f0bd57d3c8d7eb07e9f2e9a0b39502e1/frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed", size = 233717, upload-time = "2025-10-06T05:36:44.251Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0b/1b5531611e83ba7d13ccc9988967ea1b51186af64c42b7a7af465dcc9568/frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496", size = 39628, upload-time = "2025-10-06T05:36:45.423Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231", size = 43882, upload-time = "2025-10-06T05:36:46.796Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/17/502cd212cbfa96eb1388614fe39a3fc9ab87dbbe042b66f97acb57474834/frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62", size = 39676, upload-time = "2025-10-06T05:36:47.8Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/5c/3bbfaa920dfab09e76946a5d2833a7cbdf7b9b4a91c714666ac4855b88b4/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94", size = 89235, upload-time = "2025-10-06T05:36:48.78Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/d6/f03961ef72166cec1687e84e8925838442b615bd0b8854b54923ce5b7b8a/frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c", size = 50742, upload-time = "2025-10-06T05:36:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/bb/a6d12b7ba4c3337667d0e421f7181c82dda448ce4e7ad7ecd249a16fa806/frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52", size = 51725, upload-time = "2025-10-06T05:36:50.851Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/71/d1fed0ffe2c2ccd70b43714c6cab0f4188f09f8a67a7914a6b46ee30f274/frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51", size = 284533, upload-time = "2025-10-06T05:36:51.898Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/fb1685a7b009d89f9bf78a42d94461bc06581f6e718c39344754a5d9bada/frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65", size = 292506, upload-time = "2025-10-06T05:36:53.101Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/3b/b991fe1612703f7e0d05c0cf734c1b77aaf7c7d321df4572e8d36e7048c8/frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82", size = 274161, upload-time = "2025-10-06T05:36:54.309Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ec/c5c618767bcdf66e88945ec0157d7f6c4a1322f1473392319b7a2501ded7/frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714", size = 294676, upload-time = "2025-10-06T05:36:55.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/ce/3934758637d8f8a88d11f0585d6495ef54b2044ed6ec84492a91fa3b27aa/frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d", size = 300638, upload-time = "2025-10-06T05:36:56.758Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/4f/a7e4d0d467298f42de4b41cbc7ddaf19d3cfeabaf9ff97c20c6c7ee409f9/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506", size = 283067, upload-time = "2025-10-06T05:36:57.965Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/48/c7b163063d55a83772b268e6d1affb960771b0e203b632cfe09522d67ea5/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51", size = 292101, upload-time = "2025-10-06T05:36:59.237Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/d0/2366d3c4ecdc2fd391e0afa6e11500bfba0ea772764d631bbf82f0136c9d/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e", size = 289901, upload-time = "2025-10-06T05:37:00.811Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/94/daff920e82c1b70e3618a2ac39fbc01ae3e2ff6124e80739ce5d71c9b920/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0", size = 289395, upload-time = "2025-10-06T05:37:02.115Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/20/bba307ab4235a09fdcd3cc5508dbabd17c4634a1af4b96e0f69bfe551ebd/frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41", size = 283659, upload-time = "2025-10-06T05:37:03.711Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/00/04ca1c3a7a124b6de4f8a9a17cc2fcad138b4608e7a3fc5877804b8715d7/frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b", size = 43492, upload-time = "2025-10-06T05:37:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/59/5e/c69f733a86a94ab10f68e496dc6b7e8bc078ebb415281d5698313e3af3a1/frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888", size = 48034, upload-time = "2025-10-06T05:37:06.343Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6c/be9d79775d8abe79b05fa6d23da99ad6e7763a1d080fbae7290b286093fd/frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042", size = 41749, upload-time = "2025-10-06T05:37:07.431Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" },
+]
+
 [[package]]
 name = "fsspec"
 version = "2025.9.0"
@@ -132,6 +321,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7", size = 199289, upload-time = "2025-09-02T19:10:47.708Z" },
 ]
 
+[package.optional-dependencies]
+http = [
+    { name = "aiohttp" },
+]
+
+[[package]]
+name = "genson"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c5/cf/2303c8ad276dcf5ee2ad6cf69c4338fd86ef0f471a5207b069adf7a393cf/genson-1.3.0.tar.gz", hash = "sha256:e02db9ac2e3fd29e65b5286f7135762e2cd8a986537c075b06fc5f1517308e37", size = 34919, upload-time = "2024-05-15T22:08:49.123Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/5c/e226de133afd8bb267ec27eead9ae3d784b95b39a287ed404caab39a5f50/genson-1.3.0-py3-none-any.whl", hash = "sha256:468feccd00274cc7e4c09e84b08704270ba8d95232aa280f65b986139cec67f7", size = 21470, upload-time = "2024-05-15T22:08:47.056Z" },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -156,6 +359,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/0e/471f0a21db36e71a2f1752767ad77e92d8cde24e974e03d662931b1305ec/hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045", size = 2804691, upload-time = "2025-09-12T20:10:28.433Z" },
 ]
 
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
 [[package]]
 name = "huggingface-hub"
 version = "0.35.3"
@@ -184,6 +415,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
 ]
 
+[[package]]
+name = "interegular"
+version = "0.3.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/9d/8b6dde58a028a3962ce17e84d5fe73758df61378e00ef8ac3d85da34b0ff/interegular-0.3.3.tar.gz", hash = "sha256:d9b697b21b34884711399ba0f0376914b81899ce670032486d0d048344a76600", size = 24705, upload-time = "2024-01-06T23:01:22.372Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c4/01/72d6472f80651673716d1deda2a5bbb633e563ecf94f4479da5519d69d25/interegular-0.3.3-py37-none-any.whl", hash = "sha256:b0c07007d48c89d6d19f7204972d369b2a77222722e126b6aa63aa721dc3b19c", size = 23635, upload-time = "2024-01-06T23:01:20.829Z" },
+]
+
+[[package]]
+name = "iso3166"
+version = "2.1.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/11/b5023c736a185a88ebd0d38646af6f4d1b4c9b91f2ca84e08e5d2bc7ac3c/iso3166-2.1.1.tar.gz", hash = "sha256:fcd551b8dda66b44e9f9e6d6bbbee3a1145a22447c0a556e5d0fb1ad1e491719", size = 12807, upload-time = "2022-07-12T04:07:57.294Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/d0/bf18725b8d47f37858ff801f8e4d40c6982730a899725bdb6ded62199954/iso3166-2.1.1-py3-none-any.whl", hash = "sha256:263660b36f8471c42acd1ff673d28a3715edbce7d24b1550d0cf010f6816c47f", size = 9829, upload-time = "2022-07-12T04:07:55.54Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -196,6 +445,54 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]
 
+[[package]]
+name = "jsonpath-ng"
+version = "1.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ply" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6d/86/08646239a313f895186ff0a4573452038eed8c86f54380b3ebac34d32fb2/jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c", size = 37838, upload-time = "2024-10-11T15:41:42.404Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/5a/73ecb3d82f8615f32ccdadeb9356726d6cae3a4bbc840b437ceb95708063/jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6", size = 30105, upload-time = "2024-11-20T17:58:30.418Z" },
+]
+
+[[package]]
+name = "jsonschema"
+version = "4.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "jsonschema-specifications" },
+    { name = "referencing" },
+    { name = "rpds-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
+]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2025.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "referencing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
+]
+
+[[package]]
+name = "lark"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@@ -237,6 +534,7 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fe/a2/078152b45aa8a23949a1b09601d0044f8bb4ab85e909e4475a440c21aaea/mlx-0.29.3-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:d59eccf6a1e1e131becc5a3910504507862da3a4e9b7bd9e73a625515d767844", size = 549585, upload-time = "2025-10-17T19:17:01.872Z" },
     { url = "https://files.pythonhosted.org/packages/ae/bb/869eaac4efaae033c13db5fddd6a8907b5d667d135a35a2e482b1af402ee/mlx-0.29.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:6642aa0a6dc2242c024fb8274d00631a7e7ffbdcef26148afd299b877c1e6a4a", size = 549586, upload-time = "2025-10-17T19:16:57.844Z" },
     { url = "https://files.pythonhosted.org/packages/ad/76/196c248c2b2a471f795356564ad1d7dc40284160c8b66370ffadfd991fa1/mlx-0.29.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:ec0aef311fab10cb5f2c274afa6edf6c482636096a5f7886aba43676454aa462", size = 549586, upload-time = "2025-10-17T19:16:39.912Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/90/d481dd70b351e28718cfc9a0deb229a75e140abda3ed59284cf635f93f12/mlx-0.29.3-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:e217a99ece66832a2e631131df32e9feb047276b68ac59ca0ad63735842f6dd0", size = 649781, upload-time = "2025-10-17T19:21:26.075Z" },
 ]
 
 [[package]]
@@ -266,6 +564,100 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cd/9a/39b7ecdf21cf2a39ced8d7933eed65c6cb38295cadfd0907dd1abd4d1ded/mlx_metal-0.29.3-py3-none-macosx_15_0_arm64.whl", hash = "sha256:106616f7f825851043c53d3dc186965c003985da9cbb6e5c034f35108fc1fc27", size = 36549163, upload-time = "2025-10-17T19:18:37.701Z" },
 ]
 
+[[package]]
+name = "mlx-vlm"
+version = "0.3.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "datasets" },
+    { name = "fastapi" },
+    { name = "mlx" },
+    { name = "mlx-lm" },
+    { name = "numpy" },
+    { name = "opencv-python" },
+    { name = "pillow" },
+    { name = "requests" },
+    { name = "soundfile" },
+    { name = "tqdm" },
+    { name = "transformers" },
+    { name = "uvicorn" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1d/98/6b3c2d1317a317d0df544fe9ab0ef4f233ea85c1e4ac2fe6af7289ea1ee5/mlx_vlm-0.3.9.tar.gz", hash = "sha256:ae5050d0b1a051a29099c3a65efdbf6874bb497e8465734ac1992b6b179135b4", size = 303350, upload-time = "2025-12-03T21:48:24.199Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9d/76/d13985f2c42919d23d71549c92063ca749bfa6eea706fb08c14b6b5a0053/mlx_vlm-0.3.9-py3-none-any.whl", hash = "sha256:fa94a450161ae3978ca71565b5364c4ce0e86f0c1fae98a24afaa43feb121c57", size = 398621, upload-time = "2025-12-03T21:48:22.691Z" },
+]
+
+[[package]]
+name = "mpmath"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
+]
+
+[[package]]
+name = "multidict"
+version = "6.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/1e/5492c365f222f907de1039b91f922b93fa4f764c713ee858d235495d8f50/multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5", size = 101834, upload-time = "2025-10-06T14:52:30.657Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/86/33272a544eeb36d66e4d9a920602d1a2f57d4ebea4ef3cdfe5a912574c95/multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6", size = 76135, upload-time = "2025-10-06T14:49:54.26Z" },
+    { url = "https://files.pythonhosted.org/packages/91/1c/eb97db117a1ebe46d457a3d235a7b9d2e6dcab174f42d1b67663dd9e5371/multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159", size = 45117, upload-time = "2025-10-06T14:49:55.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/d8/6c3442322e41fb1dd4de8bd67bfd11cd72352ac131f6368315617de752f1/multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca", size = 43472, upload-time = "2025-10-06T14:49:57.048Z" },
+    { url = "https://files.pythonhosted.org/packages/75/3f/e2639e80325af0b6c6febdf8e57cc07043ff15f57fa1ef808f4ccb5ac4cd/multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8", size = 249342, upload-time = "2025-10-06T14:49:58.368Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cc/84e0585f805cbeaa9cbdaa95f9a3d6aed745b9d25700623ac89a6ecff400/multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60", size = 257082, upload-time = "2025-10-06T14:49:59.89Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/9c/ac851c107c92289acbbf5cfb485694084690c1b17e555f44952c26ddc5bd/multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4", size = 240704, upload-time = "2025-10-06T14:50:01.485Z" },
+    { url = "https://files.pythonhosted.org/packages/50/cc/5f93e99427248c09da95b62d64b25748a5f5c98c7c2ab09825a1d6af0e15/multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f", size = 266355, upload-time = "2025-10-06T14:50:02.955Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/0c/2ec1d883ceb79c6f7f6d7ad90c919c898f5d1c6ea96d322751420211e072/multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf", size = 267259, upload-time = "2025-10-06T14:50:04.446Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/2d/f0b184fa88d6630aa267680bdb8623fb69cb0d024b8c6f0d23f9a0f406d3/multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32", size = 254903, upload-time = "2025-10-06T14:50:05.98Z" },
+    { url = "https://files.pythonhosted.org/packages/06/c9/11ea263ad0df7dfabcad404feb3c0dd40b131bc7f232d5537f2fb1356951/multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036", size = 252365, upload-time = "2025-10-06T14:50:07.511Z" },
+    { url = "https://files.pythonhosted.org/packages/41/88/d714b86ee2c17d6e09850c70c9d310abac3d808ab49dfa16b43aba9d53fd/multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec", size = 250062, upload-time = "2025-10-06T14:50:09.074Z" },
+    { url = "https://files.pythonhosted.org/packages/15/fe/ad407bb9e818c2b31383f6131ca19ea7e35ce93cf1310fce69f12e89de75/multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e", size = 249683, upload-time = "2025-10-06T14:50:10.714Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/a4/a89abdb0229e533fb925e7c6e5c40201c2873efebc9abaf14046a4536ee6/multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64", size = 261254, upload-time = "2025-10-06T14:50:12.28Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/aa/0e2b27bd88b40a4fb8dc53dd74eecac70edaa4c1dd0707eb2164da3675b3/multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd", size = 257967, upload-time = "2025-10-06T14:50:14.16Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/8e/0c67b7120d5d5f6d874ed85a085f9dc770a7f9d8813e80f44a9fec820bb7/multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288", size = 250085, upload-time = "2025-10-06T14:50:15.639Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/55/b73e1d624ea4b8fd4dd07a3bb70f6e4c7c6c5d9d640a41c6ffe5cdbd2a55/multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17", size = 41713, upload-time = "2025-10-06T14:50:17.066Z" },
+    { url = "https://files.pythonhosted.org/packages/32/31/75c59e7d3b4205075b4c183fa4ca398a2daf2303ddf616b04ae6ef55cffe/multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390", size = 45915, upload-time = "2025-10-06T14:50:18.264Z" },
+    { url = "https://files.pythonhosted.org/packages/31/2a/8987831e811f1184c22bc2e45844934385363ee61c0a2dcfa8f71b87e608/multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e", size = 43077, upload-time = "2025-10-06T14:50:19.853Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/68/7b3a5170a382a340147337b300b9eb25a9ddb573bcdfff19c0fa3f31ffba/multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00", size = 83114, upload-time = "2025-10-06T14:50:21.223Z" },
+    { url = "https://files.pythonhosted.org/packages/55/5c/3fa2d07c84df4e302060f555bbf539310980362236ad49f50eeb0a1c1eb9/multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb", size = 48442, upload-time = "2025-10-06T14:50:22.871Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/56/67212d33239797f9bd91962bb899d72bb0f4c35a8652dcdb8ed049bef878/multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b", size = 46885, upload-time = "2025-10-06T14:50:24.258Z" },
+    { url = "https://files.pythonhosted.org/packages/46/d1/908f896224290350721597a61a69cd19b89ad8ee0ae1f38b3f5cd12ea2ac/multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c", size = 242588, upload-time = "2025-10-06T14:50:25.716Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/67/8604288bbd68680eee0ab568fdcb56171d8b23a01bcd5cb0c8fedf6e5d99/multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1", size = 249966, upload-time = "2025-10-06T14:50:28.192Z" },
+    { url = "https://files.pythonhosted.org/packages/20/33/9228d76339f1ba51e3efef7da3ebd91964d3006217aae13211653193c3ff/multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b", size = 228618, upload-time = "2025-10-06T14:50:29.82Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/2d/25d9b566d10cab1c42b3b9e5b11ef79c9111eaf4463b8c257a3bd89e0ead/multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5", size = 257539, upload-time = "2025-10-06T14:50:31.731Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/b1/8d1a965e6637fc33de3c0d8f414485c2b7e4af00f42cab3d84e7b955c222/multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad", size = 256345, upload-time = "2025-10-06T14:50:33.26Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/0c/06b5a8adbdeedada6f4fb8d8f193d44a347223b11939b42953eeb6530b6b/multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c", size = 247934, upload-time = "2025-10-06T14:50:34.808Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/31/b2491b5fe167ca044c6eb4b8f2c9f3b8a00b24c432c365358eadac5d7625/multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5", size = 245243, upload-time = "2025-10-06T14:50:36.436Z" },
+    { url = "https://files.pythonhosted.org/packages/61/1a/982913957cb90406c8c94f53001abd9eafc271cb3e70ff6371590bec478e/multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10", size = 235878, upload-time = "2025-10-06T14:50:37.953Z" },
+    { url = "https://files.pythonhosted.org/packages/be/c0/21435d804c1a1cf7a2608593f4d19bca5bcbd7a81a70b253fdd1c12af9c0/multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754", size = 243452, upload-time = "2025-10-06T14:50:39.574Z" },
+    { url = "https://files.pythonhosted.org/packages/54/0a/4349d540d4a883863191be6eb9a928846d4ec0ea007d3dcd36323bb058ac/multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c", size = 252312, upload-time = "2025-10-06T14:50:41.612Z" },
+    { url = "https://files.pythonhosted.org/packages/26/64/d5416038dbda1488daf16b676e4dbfd9674dde10a0cc8f4fc2b502d8125d/multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762", size = 246935, upload-time = "2025-10-06T14:50:43.972Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/8c/8290c50d14e49f35e0bd4abc25e1bc7711149ca9588ab7d04f886cdf03d9/multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6", size = 243385, upload-time = "2025-10-06T14:50:45.648Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/a0/f83ae75e42d694b3fbad3e047670e511c138be747bc713cf1b10d5096416/multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d", size = 47777, upload-time = "2025-10-06T14:50:47.154Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/80/9b174a92814a3830b7357307a792300f42c9e94664b01dee8e457551fa66/multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6", size = 53104, upload-time = "2025-10-06T14:50:48.851Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/28/04baeaf0428d95bb7a7bea0e691ba2f31394338ba424fb0679a9ed0f4c09/multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792", size = 45503, upload-time = "2025-10-06T14:50:50.16Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" },
+]
+
+[[package]]
+name = "multiprocess"
+version = "0.70.18"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dill" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/d8/0cba6cf51a1a31f20471fbc823a716170c73012ddc4fb85d706630ed6e8f/multiprocess-0.70.18-py310-none-any.whl", hash = "sha256:60c194974c31784019c1f459d984e8f33ee48f10fcf42c309ba97b30d9bd53ea", size = 134948, upload-time = "2025-04-17T03:11:20.223Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/88/9039f2fed1012ef584751d4ceff9ab4a51e5ae264898f0b7cbf44340a859/multiprocess-0.70.18-py311-none-any.whl", hash = "sha256:5aa6eef98e691281b3ad923be2832bf1c55dd2c859acd73e5ec53a66aae06a1d", size = 144462, upload-time = "2025-04-17T03:11:21.657Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b6/5f922792be93b82ec6b5f270bbb1ef031fd0622847070bbcf9da816502cc/multiprocess-0.70.18-py312-none-any.whl", hash = "sha256:9b78f8e5024b573730bfb654783a13800c2c0f2dfc0c25e70b40d184d64adaa2", size = 150287, upload-time = "2025-04-17T03:11:22.69Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/25/7d7e78e750bc1aecfaf0efbf826c69a791d2eeaf29cf20cba93ff4cced78/multiprocess-0.70.18-py313-none-any.whl", hash = "sha256:871743755f43ef57d7910a38433cfe41319e72be1bbd90b79c7a5ac523eb9334", size = 151917, upload-time = "2025-04-17T03:11:24.044Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/c3/ca84c19bd14cdfc21c388fdcebf08b86a7a470ebc9f5c3c084fc2dbc50f7/multiprocess-0.70.18-py38-none-any.whl", hash = "sha256:dbf705e52a154fe5e90fb17b38f02556169557c2dd8bb084f2e06c2784d8279b", size = 132636, upload-time = "2025-04-17T03:11:24.936Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" },
+]
+
 [[package]]
 name = "mypy-extensions"
 version = "1.1.0"
@@ -275,36 +667,243 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
 ]
 
+[[package]]
+name = "nest-asyncio"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" },
+]
+
+[[package]]
+name = "networkx"
+version = "3.6.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" },
+]
+
 [[package]]
 name = "numpy"
-version = "2.3.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b5/f4/098d2270d52b41f1bd7db9fc288aaa0400cb48c2a3e2af6fa365d9720947/numpy-2.3.4.tar.gz", hash = "sha256:a7d018bfedb375a8d979ac758b120ba846a7fe764911a64465fd87b8729f4a6a", size = 20582187, upload-time = "2025-10-15T16:18:11.77Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/57/7e/b72610cc91edf138bc588df5150957a4937221ca6058b825b4725c27be62/numpy-2.3.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c090d4860032b857d94144d1a9976b8e36709e40386db289aaf6672de2a81966", size = 20950335, upload-time = "2025-10-15T16:16:10.304Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/46/bdd3370dcea2f95ef14af79dbf81e6927102ddf1cc54adc0024d61252fd9/numpy-2.3.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a13fc473b6db0be619e45f11f9e81260f7302f8d180c49a22b6e6120022596b3", size = 14179878, upload-time = "2025-10-15T16:16:12.595Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/01/5a67cb785bda60f45415d09c2bc245433f1c68dd82eef9c9002c508b5a65/numpy-2.3.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3634093d0b428e6c32c3a69b78e554f0cd20ee420dcad5a9f3b2a63762ce4197", size = 5108673, upload-time = "2025-10-15T16:16:14.877Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/cd/8428e23a9fcebd33988f4cb61208fda832800ca03781f471f3727a820704/numpy-2.3.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:043885b4f7e6e232d7df4f51ffdef8c36320ee9d5f227b380ea636722c7ed12e", size = 6641438, upload-time = "2025-10-15T16:16:16.805Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/d1/913fe563820f3c6b079f992458f7331278dcd7ba8427e8e745af37ddb44f/numpy-2.3.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ee6a571d1e4f0ea6d5f22d6e5fbd6ed1dc2b18542848e1e7301bd190500c9d7", size = 14281290, upload-time = "2025-10-15T16:16:18.764Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/7e/7d306ff7cb143e6d975cfa7eb98a93e73495c4deabb7d1b5ecf09ea0fd69/numpy-2.3.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fc8a63918b04b8571789688b2780ab2b4a33ab44bfe8ccea36d3eba51228c953", size = 16636543, upload-time = "2025-10-15T16:16:21.072Z" },
-    { url = "https://files.pythonhosted.org/packages/47/6a/8cfc486237e56ccfb0db234945552a557ca266f022d281a2f577b98e955c/numpy-2.3.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:40cc556d5abbc54aabe2b1ae287042d7bdb80c08edede19f0c0afb36ae586f37", size = 16056117, upload-time = "2025-10-15T16:16:23.369Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/0e/42cb5e69ea901e06ce24bfcc4b5664a56f950a70efdcf221f30d9615f3f3/numpy-2.3.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ecb63014bb7f4ce653f8be7f1df8cbc6093a5a2811211770f6606cc92b5a78fd", size = 18577788, upload-time = "2025-10-15T16:16:27.496Z" },
-    { url = "https://files.pythonhosted.org/packages/86/92/41c3d5157d3177559ef0a35da50f0cda7fa071f4ba2306dd36818591a5bc/numpy-2.3.4-cp313-cp313-win32.whl", hash = "sha256:e8370eb6925bb8c1c4264fec52b0384b44f675f191df91cbe0140ec9f0955646", size = 6282620, upload-time = "2025-10-15T16:16:29.811Z" },
-    { url = "https://files.pythonhosted.org/packages/09/97/fd421e8bc50766665ad35536c2bb4ef916533ba1fdd053a62d96cc7c8b95/numpy-2.3.4-cp313-cp313-win_amd64.whl", hash = "sha256:56209416e81a7893036eea03abcb91c130643eb14233b2515c90dcac963fe99d", size = 12784672, upload-time = "2025-10-15T16:16:31.589Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/df/5474fb2f74970ca8eb978093969b125a84cc3d30e47f82191f981f13a8a0/numpy-2.3.4-cp313-cp313-win_arm64.whl", hash = "sha256:a700a4031bc0fd6936e78a752eefb79092cecad2599ea9c8039c548bc097f9bc", size = 10196702, upload-time = "2025-10-15T16:16:33.902Z" },
-    { url = "https://files.pythonhosted.org/packages/11/83/66ac031464ec1767ea3ed48ce40f615eb441072945e98693bec0bcd056cc/numpy-2.3.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:86966db35c4040fdca64f0816a1c1dd8dbd027d90fca5a57e00e1ca4cd41b879", size = 21049003, upload-time = "2025-10-15T16:16:36.101Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/99/5b14e0e686e61371659a1d5bebd04596b1d72227ce36eed121bb0aeab798/numpy-2.3.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:838f045478638b26c375ee96ea89464d38428c69170360b23a1a50fa4baa3562", size = 14302980, upload-time = "2025-10-15T16:16:39.124Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/44/e9486649cd087d9fc6920e3fc3ac2aba10838d10804b1e179fb7cbc4e634/numpy-2.3.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d7315ed1dab0286adca467377c8381cd748f3dc92235f22a7dfc42745644a96a", size = 5231472, upload-time = "2025-10-15T16:16:41.168Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/51/902b24fa8887e5fe2063fd61b1895a476d0bbf46811ab0c7fdf4bd127345/numpy-2.3.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:84f01a4d18b2cc4ade1814a08e5f3c907b079c847051d720fad15ce37aa930b6", size = 6739342, upload-time = "2025-10-15T16:16:43.777Z" },
-    { url = "https://files.pythonhosted.org/packages/34/f1/4de9586d05b1962acdcdb1dc4af6646361a643f8c864cef7c852bf509740/numpy-2.3.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:817e719a868f0dacde4abdfc5c1910b301877970195db9ab6a5e2c4bd5b121f7", size = 14354338, upload-time = "2025-10-15T16:16:46.081Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/06/1c16103b425de7969d5a76bdf5ada0804b476fed05d5f9e17b777f1cbefd/numpy-2.3.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85e071da78d92a214212cacea81c6da557cab307f2c34b5f85b628e94803f9c0", size = 16702392, upload-time = "2025-10-15T16:16:48.455Z" },
-    { url = "https://files.pythonhosted.org/packages/34/b2/65f4dc1b89b5322093572b6e55161bb42e3e0487067af73627f795cc9d47/numpy-2.3.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2ec646892819370cf3558f518797f16597b4e4669894a2ba712caccc9da53f1f", size = 16134998, upload-time = "2025-10-15T16:16:51.114Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/11/94ec578896cdb973aaf56425d6c7f2aff4186a5c00fac15ff2ec46998b46/numpy-2.3.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:035796aaaddfe2f9664b9a9372f089cfc88bd795a67bd1bfe15e6e770934cf64", size = 18651574, upload-time = "2025-10-15T16:16:53.429Z" },
-    { url = "https://files.pythonhosted.org/packages/62/b7/7efa763ab33dbccf56dade36938a77345ce8e8192d6b39e470ca25ff3cd0/numpy-2.3.4-cp313-cp313t-win32.whl", hash = "sha256:fea80f4f4cf83b54c3a051f2f727870ee51e22f0248d3114b8e755d160b38cfb", size = 6413135, upload-time = "2025-10-15T16:16:55.992Z" },
-    { url = "https://files.pythonhosted.org/packages/43/70/aba4c38e8400abcc2f345e13d972fb36c26409b3e644366db7649015f291/numpy-2.3.4-cp313-cp313t-win_amd64.whl", hash = "sha256:15eea9f306b98e0be91eb344a94c0e630689ef302e10c2ce5f7e11905c704f9c", size = 12928582, upload-time = "2025-10-15T16:16:57.943Z" },
-    { url = "https://files.pythonhosted.org/packages/67/63/871fad5f0073fc00fbbdd7232962ea1ac40eeaae2bba66c76214f7954236/numpy-2.3.4-cp313-cp313t-win_arm64.whl", hash = "sha256:b6c231c9c2fadbae4011ca5e7e83e12dc4a5072f1a1d85a0a7b3ed754d145a40", size = 10266691, upload-time = "2025-10-15T16:17:00.048Z" },
+version = "2.2.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
+    { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
+    { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
+    { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
+    { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
+    { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
+    { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
+    { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
+    { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" },
+    { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
 ]
 
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12" },
+    { name = "nvidia-cusparse-cu12" },
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+]
+
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.27.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.3.20"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+]
+
+[[package]]
+name = "opencv-python"
+version = "4.12.0.88"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ac/71/25c98e634b6bdeca4727c7f6d6927b056080668c5008ad3c8fc9e7f8f6ec/opencv-python-4.12.0.88.tar.gz", hash = "sha256:8b738389cede219405f6f3880b851efa3415ccd674752219377353f017d2994d", size = 95373294, upload-time = "2025-07-07T09:20:52.389Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/68/3da40142e7c21e9b1d4e7ddd6c58738feb013203e6e4b803d62cdd9eb96b/opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:f9a1f08883257b95a5764bf517a32d75aec325319c8ed0f89739a57fae9e92a5", size = 37877727, upload-time = "2025-07-07T09:13:31.47Z" },
+    { url = "https://files.pythonhosted.org/packages/33/7c/042abe49f58d6ee7e1028eefc3334d98ca69b030e3b567fe245a2b28ea6f/opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:812eb116ad2b4de43ee116fcd8991c3a687f099ada0b04e68f64899c09448e81", size = 57326471, upload-time = "2025-07-07T09:13:41.26Z" },
+    { url = "https://files.pythonhosted.org/packages/62/3a/440bd64736cf8116f01f3b7f9f2e111afb2e02beb2ccc08a6458114a6b5d/opencv_python-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:51fd981c7df6af3e8f70b1556696b05224c4e6b6777bdd2a46b3d4fb09de1a92", size = 45887139, upload-time = "2025-07-07T09:13:50.761Z" },
+    { url = "https://files.pythonhosted.org/packages/68/1f/795e7f4aa2eacc59afa4fb61a2e35e510d06414dd5a802b51a012d691b37/opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:092c16da4c5a163a818f120c22c5e4a2f96e0db4f24e659c701f1fe629a690f9", size = 67041680, upload-time = "2025-07-07T09:14:01.995Z" },
+    { url = "https://files.pythonhosted.org/packages/02/96/213fea371d3cb2f1d537612a105792aa0a6659fb2665b22cad709a75bd94/opencv_python-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:ff554d3f725b39878ac6a2e1fa232ec509c36130927afc18a1719ebf4fbf4357", size = 30284131, upload-time = "2025-07-07T09:14:08.819Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/80/eb88edc2e2b11cd2dd2e56f1c80b5784d11d6e6b7f04a1145df64df40065/opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:d98edb20aa932fd8ebd276a72627dad9dc097695b3d435a4257557bbb49a79d2", size = 39000307, upload-time = "2025-07-07T09:14:16.641Z" },
+]
+
+[[package]]
+name = "outlines"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "airportsdata" },
+    { name = "cloudpickle" },
+    { name = "diskcache" },
+    { name = "genson" },
+    { name = "interegular" },
+    { name = "iso3166" },
+    { name = "jinja2" },
+    { name = "jsonpath-ng" },
+    { name = "jsonschema" },
+    { name = "lark" },
+    { name = "nest-asyncio" },
+    { name = "outlines-core" },
+    { name = "pillow" },
+    { name = "pydantic" },
+    { name = "referencing" },
+    { name = "requests" },
+    { name = "torch" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/98/9b/eea214f307589902d5d2d5ef078fc922f57f6f7dce932dcf784294077236/outlines-1.1.1.tar.gz", hash = "sha256:a417e3fefa985fdba86f3b47d6ea0ca8b39ddc0ee85f8979934ea0c767f6ec40", size = 2822241, upload-time = "2025-07-11T12:13:52.524Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/c2/042c79783a5bfdf2ccfef15480e4fdbf59e90cd78a861911a8bcf86e7f27/outlines-1.1.1-py3-none-any.whl", hash = "sha256:5b46b90d2ccf1aae76bb8061eab9376fff2309b278c0547c758a6f0f2702d867", size = 100326, upload-time = "2025-07-11T12:13:51.06Z" },
+]
+
+[[package]]
+name = "outlines-core"
+version = "0.1.26"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "interegular" },
+    { name = "jsonschema" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d3/f3/274d07f4702728b43581235a77e545ec602b25f9b0098b288a0f3052521d/outlines_core-0.1.26.tar.gz", hash = "sha256:481c4301341e77cc8f1832d616784adb4d461b4fec65878e7c0d2cba7163a189", size = 75139, upload-time = "2024-12-12T23:38:50.703Z" }
+
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -314,6 +913,33 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
+[[package]]
+name = "pandas"
+version = "2.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "python-dateutil" },
+    { name = "pytz" },
+    { name = "tzdata" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671, upload-time = "2025-09-29T23:21:05.024Z" },
+    { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807, upload-time = "2025-09-29T23:21:15.979Z" },
+    { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872, upload-time = "2025-09-29T23:21:27.165Z" },
+    { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371, upload-time = "2025-09-29T23:21:40.532Z" },
+    { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333, upload-time = "2025-09-29T23:21:55.77Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120, upload-time = "2025-09-29T23:22:10.109Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991, upload-time = "2025-09-29T23:25:04.889Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227, upload-time = "2025-09-29T23:22:24.343Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056, upload-time = "2025-09-29T23:22:37.762Z" },
+    { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189, upload-time = "2025-09-29T23:22:51.688Z" },
+    { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912, upload-time = "2025-09-29T23:23:05.042Z" },
+    { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160, upload-time = "2025-09-29T23:23:28.57Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233, upload-time = "2025-09-29T23:24:24.876Z" },
+]
+
 [[package]]
 name = "pathspec"
 version = "0.12.1"
@@ -323,6 +949,39 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
 ]
 
+[[package]]
+name = "pillow"
+version = "12.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/02/d52c733a2452ef1ffcc123b68e6606d07276b0e358db70eabad7e40042b7/pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9", size = 46977283, upload-time = "2026-01-02T09:13:29.892Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dd/c7/2530a4aa28248623e9d7f27316b42e27c32ec410f695929696f2e0e4a778/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1", size = 4062543, upload-time = "2026-01-02T09:11:31.566Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/1f/40b8eae823dc1519b87d53c30ed9ef085506b05281d313031755c1705f73/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179", size = 4138373, upload-time = "2026-01-02T09:11:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/77/6fa60634cf06e52139fd0e89e5bbf055e8166c691c42fb162818b7fda31d/pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0", size = 3601241, upload-time = "2026-01-02T09:11:35.011Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bf/28ab865de622e14b747f0cd7877510848252d950e43002e224fb1c9ababf/pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587", size = 5262410, upload-time = "2026-01-02T09:11:36.682Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/34/583420a1b55e715937a85bd48c5c0991598247a1fd2eb5423188e765ea02/pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac", size = 4657312, upload-time = "2026-01-02T09:11:38.535Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/fd/f5a0896839762885b3376ff04878f86ab2b097c2f9a9cdccf4eda8ba8dc0/pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b", size = 6232605, upload-time = "2026-01-02T09:11:40.602Z" },
+    { url = "https://files.pythonhosted.org/packages/98/aa/938a09d127ac1e70e6ed467bd03834350b33ef646b31edb7452d5de43792/pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea", size = 8041617, upload-time = "2026-01-02T09:11:42.721Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e8/538b24cb426ac0186e03f80f78bc8dc7246c667f58b540bdd57c71c9f79d/pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c", size = 6346509, upload-time = "2026-01-02T09:11:44.955Z" },
+    { url = "https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc", size = 7038117, upload-time = "2026-01-02T09:11:46.736Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a2/d40308cf86eada842ca1f3ffa45d0ca0df7e4ab33c83f81e73f5eaed136d/pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644", size = 6460151, upload-time = "2026-01-02T09:11:48.625Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/88/f5b058ad6453a085c5266660a1417bdad590199da1b32fb4efcff9d33b05/pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c", size = 7164534, upload-time = "2026-01-02T09:11:50.445Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ce/c17334caea1db789163b5d855a5735e47995b0b5dc8745e9a3605d5f24c0/pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171", size = 6332551, upload-time = "2026-01-02T09:11:52.234Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/07/74a9d941fa45c90a0d9465098fe1ec85de3e2afbdc15cc4766622d516056/pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a", size = 7040087, upload-time = "2026-01-02T09:11:54.822Z" },
+    { url = "https://files.pythonhosted.org/packages/88/09/c99950c075a0e9053d8e880595926302575bc742b1b47fe1bbcc8d388d50/pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45", size = 2452470, upload-time = "2026-01-02T09:11:56.522Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/ba/970b7d85ba01f348dee4d65412476321d40ee04dcb51cd3735b9dc94eb58/pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d", size = 5264816, upload-time = "2026-01-02T09:11:58.227Z" },
+    { url = "https://files.pythonhosted.org/packages/10/60/650f2fb55fdba7a510d836202aa52f0baac633e50ab1cf18415d332188fb/pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0", size = 4660472, upload-time = "2026-01-02T09:12:00.798Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/5273a99478956a099d533c4f46cbaa19fd69d606624f4334b85e50987a08/pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554", size = 6268974, upload-time = "2026-01-02T09:12:02.572Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/26/0bf714bc2e73d5267887d47931d53c4ceeceea6978148ed2ab2a4e6463c4/pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e", size = 8073070, upload-time = "2026-01-02T09:12:04.75Z" },
+    { url = "https://files.pythonhosted.org/packages/43/cf/1ea826200de111a9d65724c54f927f3111dc5ae297f294b370a670c17786/pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82", size = 6380176, upload-time = "2026-01-02T09:12:06.626Z" },
+    { url = "https://files.pythonhosted.org/packages/03/e0/7938dd2b2013373fd85d96e0f38d62b7a5a262af21ac274250c7ca7847c9/pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4", size = 7067061, upload-time = "2026-01-02T09:12:08.624Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ad/a2aa97d37272a929a98437a8c0ac37b3cf012f4f8721e1bd5154699b2518/pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0", size = 6491824, upload-time = "2026-01-02T09:12:10.488Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/44/80e46611b288d51b115826f136fb3465653c28f491068a72d3da49b54cd4/pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b", size = 7190911, upload-time = "2026-01-02T09:12:12.772Z" },
+    { url = "https://files.pythonhosted.org/packages/86/77/eacc62356b4cf81abe99ff9dbc7402750044aed02cfd6a503f7c6fc11f3e/pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65", size = 6336445, upload-time = "2026-01-02T09:12:14.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3c/57d81d0b74d218706dafccb87a87ea44262c43eef98eb3b164fd000e0491/pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0", size = 7045354, upload-time = "2026-01-02T09:12:16.599Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/82/8b9b97bba2e3576a340f93b044a3a3a09841170ab4c1eb0d5c93469fd32f/pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8", size = 2454547, upload-time = "2026-01-02T09:12:18.704Z" },
+]
+
 [[package]]
 name = "platformdirs"
 version = "4.5.0"
@@ -332,6 +991,54 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" },
 ]
 
+[[package]]
+name = "ply"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130, upload-time = "2018-02-15T19:01:31.097Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" },
+]
+
+[[package]]
+name = "propcache"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bf/df/6d9c1b6ac12b003837dde8a10231a7344512186e87b36e855bef32241942/propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf", size = 77750, upload-time = "2025-10-08T19:47:07.648Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/e8/677a0025e8a2acf07d3418a2e7ba529c9c33caf09d3c1f25513023c1db56/propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311", size = 44780, upload-time = "2025-10-08T19:47:08.851Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a4/92380f7ca60f99ebae761936bc48a72a639e8a47b29050615eef757cb2a7/propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74", size = 46308, upload-time = "2025-10-08T19:47:09.982Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/48/c5ac64dee5262044348d1d78a5f85dd1a57464a60d30daee946699963eb3/propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe", size = 208182, upload-time = "2025-10-08T19:47:11.319Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0c/cd762dd011a9287389a6a3eb43aa30207bde253610cca06824aeabfe9653/propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af", size = 211215, upload-time = "2025-10-08T19:47:13.146Z" },
+    { url = "https://files.pythonhosted.org/packages/30/3e/49861e90233ba36890ae0ca4c660e95df565b2cd15d4a68556ab5865974e/propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c", size = 218112, upload-time = "2025-10-08T19:47:14.913Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/8b/544bc867e24e1bd48f3118cecd3b05c694e160a168478fa28770f22fd094/propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f", size = 204442, upload-time = "2025-10-08T19:47:16.277Z" },
+    { url = "https://files.pythonhosted.org/packages/50/a6/4282772fd016a76d3e5c0df58380a5ea64900afd836cec2c2f662d1b9bb3/propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1", size = 199398, upload-time = "2025-10-08T19:47:17.962Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ec/d8a7cd406ee1ddb705db2139f8a10a8a427100347bd698e7014351c7af09/propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24", size = 196920, upload-time = "2025-10-08T19:47:19.355Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/f38ab64af3764f431e359f8baf9e0a21013e24329e8b85d2da32e8ed07ca/propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa", size = 203748, upload-time = "2025-10-08T19:47:21.338Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/e3/fa846bd70f6534d647886621388f0a265254d30e3ce47e5c8e6e27dbf153/propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61", size = 205877, upload-time = "2025-10-08T19:47:23.059Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/39/8163fc6f3133fea7b5f2827e8eba2029a0277ab2c5beee6c1db7b10fc23d/propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66", size = 199437, upload-time = "2025-10-08T19:47:24.445Z" },
+    { url = "https://files.pythonhosted.org/packages/93/89/caa9089970ca49c7c01662bd0eeedfe85494e863e8043565aeb6472ce8fe/propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81", size = 37586, upload-time = "2025-10-08T19:47:25.736Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/ab/f76ec3c3627c883215b5c8080debb4394ef5a7a29be811f786415fc1e6fd/propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e", size = 40790, upload-time = "2025-10-08T19:47:26.847Z" },
+    { url = "https://files.pythonhosted.org/packages/59/1b/e71ae98235f8e2ba5004d8cb19765a74877abf189bc53fc0c80d799e56c3/propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1", size = 37158, upload-time = "2025-10-08T19:47:27.961Z" },
+    { url = "https://files.pythonhosted.org/packages/83/ce/a31bbdfc24ee0dcbba458c8175ed26089cf109a55bbe7b7640ed2470cfe9/propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b", size = 81451, upload-time = "2025-10-08T19:47:29.445Z" },
+    { url = "https://files.pythonhosted.org/packages/25/9c/442a45a470a68456e710d96cacd3573ef26a1d0a60067e6a7d5e655621ed/propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566", size = 46374, upload-time = "2025-10-08T19:47:30.579Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/bf/b1d5e21dbc3b2e889ea4327044fb16312a736d97640fb8b6aa3f9c7b3b65/propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835", size = 48396, upload-time = "2025-10-08T19:47:31.79Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/04/5b4c54a103d480e978d3c8a76073502b18db0c4bc17ab91b3cb5092ad949/propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e", size = 275950, upload-time = "2025-10-08T19:47:33.481Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/c1/86f846827fb969c4b78b0af79bba1d1ea2156492e1b83dea8b8a6ae27395/propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859", size = 273856, upload-time = "2025-10-08T19:47:34.906Z" },
+    { url = "https://files.pythonhosted.org/packages/36/1d/fc272a63c8d3bbad6878c336c7a7dea15e8f2d23a544bda43205dfa83ada/propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b", size = 280420, upload-time = "2025-10-08T19:47:36.338Z" },
+    { url = "https://files.pythonhosted.org/packages/07/0c/01f2219d39f7e53d52e5173bcb09c976609ba30209912a0680adfb8c593a/propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0", size = 263254, upload-time = "2025-10-08T19:47:37.692Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/18/cd28081658ce597898f0c4d174d4d0f3c5b6d4dc27ffafeef835c95eb359/propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af", size = 261205, upload-time = "2025-10-08T19:47:39.659Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/71/1f9e22eb8b8316701c2a19fa1f388c8a3185082607da8e406a803c9b954e/propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393", size = 247873, upload-time = "2025-10-08T19:47:41.084Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/65/3d4b61f36af2b4eddba9def857959f1016a51066b4f1ce348e0cf7881f58/propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874", size = 262739, upload-time = "2025-10-08T19:47:42.51Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/42/26746ab087faa77c1c68079b228810436ccd9a5ce9ac85e2b7307195fd06/propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7", size = 263514, upload-time = "2025-10-08T19:47:43.927Z" },
+    { url = "https://files.pythonhosted.org/packages/94/13/630690fe201f5502d2403dd3cfd451ed8858fe3c738ee88d095ad2ff407b/propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1", size = 257781, upload-time = "2025-10-08T19:47:45.448Z" },
+    { url = "https://files.pythonhosted.org/packages/92/f7/1d4ec5841505f423469efbfc381d64b7b467438cd5a4bbcbb063f3b73d27/propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717", size = 41396, upload-time = "2025-10-08T19:47:47.202Z" },
+    { url = "https://files.pythonhosted.org/packages/48/f0/615c30622316496d2cbbc29f5985f7777d3ada70f23370608c1d3e081c1f/propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37", size = 44897, upload-time = "2025-10-08T19:47:48.336Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/ca/6002e46eccbe0e33dcd4069ef32f7f1c9e243736e07adca37ae8c4830ec3/propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a", size = 39789, upload-time = "2025-10-08T19:47:49.876Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
+]
+
 [[package]]
 name = "protobuf"
 version = "6.33.0"
@@ -347,6 +1054,59 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/07/d1/0a28c21707807c6aacd5dc9c3704b2aa1effbf37adebd8caeaf68b17a636/protobuf-6.33.0-py3-none-any.whl", hash = "sha256:25c9e1963c6734448ea2d308cfa610e692b801304ba0908d7bfa564ac5132995", size = 170477, upload-time = "2025-10-15T20:39:51.311Z" },
 ]
 
+[[package]]
+name = "psutil"
+version = "7.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/cb/09e5184fb5fc0358d110fc3ca7f6b1d033800734d34cac10f4136cfac10e/psutil-7.2.1.tar.gz", hash = "sha256:f7583aec590485b43ca601dd9cea0dcd65bd7bb21d30ef4ddbf4ea6b5ed1bdd3", size = 490253, upload-time = "2025-12-29T08:26:00.169Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/77/8e/f0c242053a368c2aa89584ecd1b054a18683f13d6e5a318fc9ec36582c94/psutil-7.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ba9f33bb525b14c3ea563b2fd521a84d2fa214ec59e3e6a2858f78d0844dd60d", size = 129624, upload-time = "2025-12-29T08:26:04.255Z" },
+    { url = "https://files.pythonhosted.org/packages/26/97/a58a4968f8990617decee234258a2b4fc7cd9e35668387646c1963e69f26/psutil-7.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81442dac7abfc2f4f4385ea9e12ddf5a796721c0f6133260687fec5c3780fa49", size = 130132, upload-time = "2025-12-29T08:26:06.228Z" },
+    { url = "https://files.pythonhosted.org/packages/db/6d/ed44901e830739af5f72a85fa7ec5ff1edea7f81bfbf4875e409007149bd/psutil-7.2.1-cp313-cp313t-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea46c0d060491051d39f0d2cff4f98d5c72b288289f57a21556cc7d504db37fc", size = 180612, upload-time = "2025-12-29T08:26:08.276Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/65/b628f8459bca4efbfae50d4bf3feaab803de9a160b9d5f3bd9295a33f0c2/psutil-7.2.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35630d5af80d5d0d49cfc4d64c1c13838baf6717a13effb35869a5919b854cdf", size = 183201, upload-time = "2025-12-29T08:26:10.622Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/23/851cadc9764edcc18f0effe7d0bf69f727d4cf2442deb4a9f78d4e4f30f2/psutil-7.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:923f8653416604e356073e6e0bccbe7c09990acef442def2f5640dd0faa9689f", size = 139081, upload-time = "2025-12-29T08:26:12.483Z" },
+    { url = "https://files.pythonhosted.org/packages/59/82/d63e8494ec5758029f31c6cb06d7d161175d8281e91d011a4a441c8a43b5/psutil-7.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cfbe6b40ca48019a51827f20d830887b3107a74a79b01ceb8cc8de4ccb17b672", size = 134767, upload-time = "2025-12-29T08:26:14.528Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/cf/5180eb8c8bdf6a503c6919f1da28328bd1e6b3b1b5b9d5b01ae64f019616/psutil-7.2.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b2e953fcfaedcfbc952b44744f22d16575d3aa78eb4f51ae74165b4e96e55f42", size = 128137, upload-time = "2025-12-29T08:26:27.759Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/2c/78e4a789306a92ade5000da4f5de3255202c534acdadc3aac7b5458fadef/psutil-7.2.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:05cc68dbb8c174828624062e73078e7e35406f4ca2d0866c272c2410d8ef06d1", size = 128947, upload-time = "2025-12-29T08:26:29.548Z" },
+    { url = "https://files.pythonhosted.org/packages/29/f8/40e01c350ad9a2b3cb4e6adbcc8a83b17ee50dd5792102b6142385937db5/psutil-7.2.1-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e38404ca2bb30ed7267a46c02f06ff842e92da3bb8c5bfdadbd35a5722314d8", size = 154694, upload-time = "2025-12-29T08:26:32.147Z" },
+    { url = "https://files.pythonhosted.org/packages/06/e4/b751cdf839c011a9714a783f120e6a86b7494eb70044d7d81a25a5cd295f/psutil-7.2.1-cp36-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab2b98c9fc19f13f59628d94df5cc4cc4844bc572467d113a8b517d634e362c6", size = 156136, upload-time = "2025-12-29T08:26:34.079Z" },
+    { url = "https://files.pythonhosted.org/packages/44/ad/bbf6595a8134ee1e94a4487af3f132cef7fce43aef4a93b49912a48c3af7/psutil-7.2.1-cp36-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f78baafb38436d5a128f837fab2d92c276dfb48af01a240b861ae02b2413ada8", size = 148108, upload-time = "2025-12-29T08:26:36.225Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/15/dd6fd869753ce82ff64dcbc18356093471a5a5adf4f77ed1f805d473d859/psutil-7.2.1-cp36-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:99a4cd17a5fdd1f3d014396502daa70b5ec21bf4ffe38393e152f8e449757d67", size = 147402, upload-time = "2025-12-29T08:26:39.21Z" },
+    { url = "https://files.pythonhosted.org/packages/34/68/d9317542e3f2b180c4306e3f45d3c922d7e86d8ce39f941bb9e2e9d8599e/psutil-7.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:b1b0671619343aa71c20ff9767eced0483e4fc9e1f489d50923738caf6a03c17", size = 136938, upload-time = "2025-12-29T08:26:41.036Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/73/2ce007f4198c80fcf2cb24c169884f833fe93fbc03d55d302627b094ee91/psutil-7.2.1-cp37-abi3-win_arm64.whl", hash = "sha256:0d67c1822c355aa6f7314d92018fb4268a76668a536f133599b91edd48759442", size = 133836, upload-time = "2025-12-29T08:26:43.086Z" },
+]
+
+[[package]]
+name = "pyarrow"
+version = "22.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" },
+    { url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" },
+    { url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" },
+    { url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" },
+    { url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "2.23"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.3"
@@ -392,6 +1152,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fc/de/b20f4ab954d6d399499c33ec4fafc46d9551e11dc1858fb7f5dca0748ceb/pydantic_core-2.41.4-cp313-cp313t-win_arm64.whl", hash = "sha256:19f3684868309db5263a11bace3c45d93f6f24afa2ffe75a647583df22a2ff89", size = 1970034, upload-time = "2025-10-14T10:21:30.869Z" },
 ]
 
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
+]
+
 [[package]]
 name = "pytokens"
 version = "0.2.0"
@@ -401,6 +1173,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/89/5a/c269ea6b348b6f2c32686635df89f32dbe05df1088dd4579302a6f8f99af/pytokens-0.2.0-py3-none-any.whl", hash = "sha256:74d4b318c67f4295c13782ddd9abcb7e297ec5630ad060eb90abf7ebbefe59f8", size = 12038, upload-time = "2025-10-15T08:02:41.694Z" },
 ]
 
+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -419,6 +1200,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
 ]
 
+[[package]]
+name = "referencing"
+version = "0.37.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "rpds-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
+]
+
 [[package]]
 name = "regex"
 version = "2025.9.18"
@@ -470,6 +1264,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
+[[package]]
+name = "rpds-py"
+version = "0.30.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" },
+    { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" },
+    { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" },
+    { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" },
+    { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" },
+    { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" },
+    { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" },
+    { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" },
+    { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" },
+    { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" },
+    { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" },
+]
+
 [[package]]
 name = "safetensors"
 version = "0.6.2"
@@ -492,6 +1323,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/c3/c0be1135726618dc1e28d181b8c442403d8dbb9e273fd791de2d4384bcdd/safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c", size = 320192, upload-time = "2025-08-08T13:13:59.467Z" },
 ]
 
+[[package]]
+name = "sentencepiece"
+version = "0.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/15/2e7a025fc62d764b151ae6d0f2a92f8081755ebe8d4a64099accc6f77ba6/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad", size = 3228515, upload-time = "2025-08-12T07:00:51.718Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/4a/85fbe1706d4d04a7e826b53f327c4b80f849cf1c7b7c5e31a20a97d8f28b/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dcd8161eee7b41aae57ded06272905dbd680a0a04b91edd0f64790c796b2f706", size = 1943150, upload-time = "2025-08-12T06:59:53.588Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/83/4cfb393e287509fc2155480b9d184706ef8d9fa8cbf5505d02a5792bf220/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c6c8f42949f419ff8c7e9960dbadcfbc982d7b5efc2f6748210d3dd53a7de062", size = 1325651, upload-time = "2025-08-12T06:59:55.073Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/de/5a007fb53b1ab0aafc69d11a5a3dd72a289d5a3e78dcf2c3a3d9b14ffe93/sentencepiece-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:097f3394e99456e9e4efba1737c3749d7e23563dd1588ce71a3d007f25475fff", size = 1253641, upload-time = "2025-08-12T06:59:56.562Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/d2/f552be5928105588f4f4d66ee37dd4c61460d8097e62d0e2e0eec41bc61d/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b670879c370d350557edabadbad1f6561a9e6968126e6debca4029e5547820", size = 1316271, upload-time = "2025-08-12T06:59:58.109Z" },
+    { url = "https://files.pythonhosted.org/packages/96/df/0cfe748ace5485be740fed9476dee7877f109da32ed0d280312c94ec259f/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7f0fd2f2693309e6628aeeb2e2faf6edd221134dfccac3308ca0de01f8dab47", size = 1387882, upload-time = "2025-08-12T07:00:00.701Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/dd/f7774d42a881ced8e1739f393ab1e82ece39fc9abd4779e28050c2e975b5/sentencepiece-0.2.1-cp313-cp313-win32.whl", hash = "sha256:92b3816aa2339355fda2c8c4e021a5de92180b00aaccaf5e2808972e77a4b22f", size = 999541, upload-time = "2025-08-12T07:00:02.709Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/e9/932b9eae6fd7019548321eee1ab8d5e3b3d1294df9d9a0c9ac517c7b636d/sentencepiece-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:10ed3dab2044c47f7a2e7b4969b0c430420cdd45735d78c8f853191fa0e3148b", size = 1054669, upload-time = "2025-08-12T07:00:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/3a/76488a00ea7d6931689cda28726a1447d66bf1a4837943489314593d5596/sentencepiece-0.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac650534e2251083c5f75dde4ff28896ce7c8904133dc8fef42780f4d5588fcd", size = 1033922, upload-time = "2025-08-12T07:00:06.496Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/b6/08fe2ce819e02ccb0296f4843e3f195764ce9829cbda61b7513f29b95718/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8dd4b477a7b069648d19363aad0cab9bad2f4e83b2d179be668efa672500dc94", size = 1946052, upload-time = "2025-08-12T07:00:08.136Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/d9/1ea0e740591ff4c6fc2b6eb1d7510d02f3fb885093f19b2f3abd1363b402/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c0f672da370cc490e4c59d89e12289778310a0e71d176c541e4834759e1ae07", size = 1327408, upload-time = "2025-08-12T07:00:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/99/7e/1fb26e8a21613f6200e1ab88824d5d203714162cf2883248b517deb500b7/sentencepiece-0.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad8493bea8432dae8d6830365352350f3b4144415a1d09c4c8cb8d30cf3b6c3c", size = 1254857, upload-time = "2025-08-12T07:00:11.021Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/85/c72fd1f3c7a6010544d6ae07f8ddb38b5e2a7e33bd4318f87266c0bbafbf/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b81a24733726e3678d2db63619acc5a8dccd074f7aa7a54ecd5ca33ca6d2d596", size = 1315722, upload-time = "2025-08-12T07:00:12.989Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/e8/661e5bd82a8aa641fd6c1020bd0e890ef73230a2b7215ddf9c8cd8e941c2/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a81799d0a68d618e89063fb423c3001a034c893069135ffe51fee439ae474d6", size = 1387452, upload-time = "2025-08-12T07:00:15.088Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5e/ae66c361023a470afcbc1fbb8da722c72ea678a2fcd9a18f1a12598c7501/sentencepiece-0.2.1-cp313-cp313t-win32.whl", hash = "sha256:89a3ea015517c42c0341d0d962f3e6aaf2cf10d71b1932d475c44ba48d00aa2b", size = 1002501, upload-time = "2025-08-12T07:00:16.966Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/03/d332828c4ff764e16c1b56c2c8f9a33488bbe796b53fb6b9c4205ddbf167/sentencepiece-0.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:33f068c9382dc2e7c228eedfd8163b52baa86bb92f50d0488bf2b7da7032e484", size = 1057555, upload-time = "2025-08-12T07:00:18.573Z" },
+    { url = "https://files.pythonhosted.org/packages/88/14/5aee0bf0864df9bd82bd59e7711362908e4935e3f9cdc1f57246b5d5c9b9/sentencepiece-0.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:b3616ad246f360e52c85781e47682d31abfb6554c779e42b65333d4b5f44ecc0", size = 1036042, upload-time = "2025-08-12T07:00:20.209Z" },
+]
+
 [[package]]
 name = "server"
 version = "0.1.0"
@@ -501,6 +1356,14 @@ dependencies = [
     { name = "fastapi" },
     { name = "huggingface-hub" },
     { name = "mlx-lm" },
+    { name = "mlx-vlm" },
+    { name = "outlines" },
+    { name = "outlines-core" },
+    { name = "pillow" },
+    { name = "psutil" },
+    { name = "sentencepiece" },
+    { name = "torch" },
+    { name = "transformers" },
     { name = "uvicorn" },
 ]
 
@@ -510,9 +1373,35 @@ requires-dist = [
     { name = "fastapi" },
     { name = "huggingface-hub", specifier = ">=0.34.0" },
     { name = "mlx-lm" },
+    { name = "mlx-vlm" },
+    { name = "outlines", specifier = "==1.1.1" },
+    { name = "outlines-core", specifier = "==0.1.26" },
+    { name = "pillow" },
+    { name = "psutil" },
+    { name = "sentencepiece" },
+    { name = "torch" },
+    { name = "transformers" },
     { name = "uvicorn" },
 ]
 
+[[package]]
+name = "setuptools"
+version = "80.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -522,6 +1411,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
 ]
 
+[[package]]
+name = "soundfile"
+version = "0.13.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi" },
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/64/28/e2a36573ccbcf3d57c00626a21fe51989380636e821b341d36ccca0c1c3a/soundfile-0.13.1-py2.py3-none-any.whl", hash = "sha256:a23c717560da2cf4c7b5ae1142514e0fd82d6bbd9dfc93a50423447142f2c445", size = 25751, upload-time = "2025-01-25T09:16:44.235Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/ab/73e97a5b3cc46bba7ff8650a1504348fa1863a6f9d57d7001c6b67c5f20e/soundfile-0.13.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:82dc664d19831933fe59adad199bf3945ad06d84bc111a5b4c0d3089a5b9ec33", size = 1142250, upload-time = "2025-01-25T09:16:47.583Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/e5/58fd1a8d7b26fc113af244f966ee3aecf03cb9293cb935daaddc1e455e18/soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:743f12c12c4054921e15736c6be09ac26b3b3d603aef6fd69f9dde68748f2593", size = 1101406, upload-time = "2025-01-25T09:16:49.662Z" },
+    { url = "https://files.pythonhosted.org/packages/58/ae/c0e4a53d77cf6e9a04179535766b3321b0b9ced5f70522e4caf9329f0046/soundfile-0.13.1-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c9e855f5a4d06ce4213f31918653ab7de0c5a8d8107cd2427e44b42df547deb", size = 1235729, upload-time = "2025-01-25T09:16:53.018Z" },
+    { url = "https://files.pythonhosted.org/packages/57/5e/70bdd9579b35003a489fc850b5047beeda26328053ebadc1fb60f320f7db/soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:03267c4e493315294834a0870f31dbb3b28a95561b80b134f0bd3cf2d5f0e618", size = 1313646, upload-time = "2025-01-25T09:16:54.872Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/df/8c11dc4dfceda14e3003bb81a0d0edcaaf0796dd7b4f826ea3e532146bba/soundfile-0.13.1-py2.py3-none-win32.whl", hash = "sha256:c734564fab7c5ddf8e9be5bf70bab68042cd17e9c214c06e365e20d64f9a69d5", size = 899881, upload-time = "2025-01-25T09:16:56.663Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e9/6b761de83277f2f02ded7e7ea6f07828ec78e4b229b80e4ca55dd205b9dc/soundfile-0.13.1-py2.py3-none-win_amd64.whl", hash = "sha256:1e70a05a0626524a69e9f0f4dd2ec174b4e9567f4d8b6c11d38b5c289be36ee9", size = 1019162, upload-time = "2025-01-25T09:16:59.573Z" },
+]
+
 [[package]]
 name = "starlette"
 version = "0.48.0"
@@ -534,6 +1442,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736, upload-time = "2025-09-13T08:41:03.869Z" },
 ]
 
+[[package]]
+name = "sympy"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mpmath" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
+]
+
 [[package]]
 name = "tokenizers"
 version = "0.22.1"
@@ -559,6 +1479,46 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/46/e33a8c93907b631a99377ef4c5f817ab453d0b34f93529421f42ff559671/tokenizers-0.22.1-cp39-abi3-win_amd64.whl", hash = "sha256:65fd6e3fb11ca1e78a6a93602490f134d1fdeb13bcef99389d5102ea318ed138", size = 2674684, upload-time = "2025-09-19T09:49:24.953Z" },
 ]
 
+[[package]]
+name = "torch"
+version = "2.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "fsspec" },
+    { name = "jinja2" },
+    { name = "networkx" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "setuptools" },
+    { name = "sympy" },
+    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "typing-extensions" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/60/8fc5e828d050bddfab469b3fe78e5ab9a7e53dda9c3bdc6a43d17ce99e63/torch-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c29455d2b910b98738131990394da3e50eea8291dfeb4b12de71ecf1fdeb21cb", size = 104135743, upload-time = "2025-11-12T15:21:34.936Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/b7/6d3f80e6918213babddb2a37b46dbb14c15b14c5f473e347869a51f40e1f/torch-2.9.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:524de44cd13931208ba2c4bde9ec7741fd4ae6bfd06409a604fc32f6520c2bc9", size = 899749493, upload-time = "2025-11-12T15:24:36.356Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/47/c7843d69d6de8938c1cbb1eba426b1d48ddf375f101473d3e31a5fc52b74/torch-2.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:545844cc16b3f91e08ce3b40e9c2d77012dd33a48d505aed34b7740ed627a1b2", size = 110944162, upload-time = "2025-11-12T15:21:53.151Z" },
+    { url = "https://files.pythonhosted.org/packages/28/0e/2a37247957e72c12151b33a01e4df651d9d155dd74d8cfcbfad15a79b44a/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5be4bf7496f1e3ffb1dd44b672adb1ac3f081f204c5ca81eba6442f5f634df8e", size = 74830751, upload-time = "2025-11-12T15:21:43.792Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/f7/7a18745edcd7b9ca2381aa03353647bca8aace91683c4975f19ac233809d/torch-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:30a3e170a84894f3652434b56d59a64a2c11366b0ed5776fab33c2439396bf9a", size = 104142929, upload-time = "2025-11-12T15:21:48.319Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/dd/f1c0d879f2863ef209e18823a988dc7a1bf40470750e3ebe927efdb9407f/torch-2.9.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8301a7b431e51764629208d0edaa4f9e4c33e6df0f2f90b90e261d623df6a4e2", size = 899748978, upload-time = "2025-11-12T15:23:04.568Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/9f/6986b83a53b4d043e36f3f898b798ab51f7f20fdf1a9b01a2720f445043d/torch-2.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2e1c42c0ae92bf803a4b2409fdfed85e30f9027a66887f5e7dcdbc014c7531db", size = 111176995, upload-time = "2025-11-12T15:22:01.618Z" },
+    { url = "https://files.pythonhosted.org/packages/40/60/71c698b466dd01e65d0e9514b5405faae200c52a76901baf6906856f17e4/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:2c14b3da5df416cf9cb5efab83aa3056f5b8cd8620b8fde81b4987ecab730587", size = 74480347, upload-time = "2025-11-12T15:21:57.648Z" },
+]
+
 [[package]]
 name = "tqdm"
 version = "4.67.1"
@@ -592,6 +1552,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/71/d3/c16c3b3cf7655a67db1144da94b021c200ac1303f82428f2beef6c2e72bb/transformers-4.57.1-py3-none-any.whl", hash = "sha256:b10d05da8fa67dc41644dbbf9bc45a44cb86ae33da6f9295f5fbf5b7890bd267", size = 11990925, upload-time = "2025-10-14T15:39:23.085Z" },
 ]
 
+[[package]]
+name = "triton"
+version = "3.5.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/27/46/8c3bbb5b0a19313f50edcaa363b599e5a1a5ac9683ead82b9b80fe497c8d/triton-3.5.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3f4346b6ebbd4fad18773f5ba839114f4826037c9f2f34e0148894cd5dd3dba", size = 170470410, upload-time = "2025-11-11T17:41:06.319Z" },
+    { url = "https://files.pythonhosted.org/packages/37/92/e97fcc6b2c27cdb87ce5ee063d77f8f26f19f06916aa680464c8104ef0f6/triton-3.5.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0b4d2c70127fca6a23e247f9348b8adde979d2e7a20391bfbabaac6aebc7e6a8", size = 170579924, upload-time = "2025-11-11T17:41:12.455Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
@@ -613,6 +1582,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
 ]
 
+[[package]]
+name = "tzdata"
+version = "2025.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" },
+]
+
 [[package]]
 name = "urllib3"
 version = "2.5.0"
@@ -634,3 +1612,87 @@ sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef468
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
 ]
+
+[[package]]
+name = "xxhash"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/76/35d05267ac82f53ae9b0e554da7c5e281ee61f3cad44c743f0fcd354f211/xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec", size = 32738, upload-time = "2025-10-02T14:34:55.839Z" },
+    { url = "https://files.pythonhosted.org/packages/31/a8/3fbce1cd96534a95e35d5120637bf29b0d7f5d8fa2f6374e31b4156dd419/xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1", size = 30821, upload-time = "2025-10-02T14:34:57.219Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ea/d387530ca7ecfa183cb358027f1833297c6ac6098223fd14f9782cd0015c/xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6", size = 194127, upload-time = "2025-10-02T14:34:59.21Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/0c/71435dcb99874b09a43b8d7c54071e600a7481e42b3e3ce1eb5226a5711a/xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263", size = 212975, upload-time = "2025-10-02T14:35:00.816Z" },
+    { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241, upload-time = "2025-10-02T14:35:02.207Z" },
+    { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471, upload-time = "2025-10-02T14:35:03.61Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936, upload-time = "2025-10-02T14:35:05.013Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/bd/4a5f68381939219abfe1c22a9e3a5854a4f6f6f3c4983a87d255f21f2e5d/xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7", size = 210440, upload-time = "2025-10-02T14:35:06.239Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/37/b80fe3d5cfb9faff01a02121a0f4d565eb7237e9e5fc66e73017e74dcd36/xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db", size = 197990, upload-time = "2025-10-02T14:35:07.735Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689, upload-time = "2025-10-02T14:35:09.438Z" },
+    { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068, upload-time = "2025-10-02T14:35:11.162Z" },
+    { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495, upload-time = "2025-10-02T14:35:12.971Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/3a/6797e0114c21d1725e2577508e24006fd7ff1d8c0c502d3b52e45c1771d8/xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799", size = 30620, upload-time = "2025-10-02T14:35:14.129Z" },
+    { url = "https://files.pythonhosted.org/packages/86/15/9bc32671e9a38b413a76d24722a2bf8784a132c043063a8f5152d390b0f9/xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392", size = 31542, upload-time = "2025-10-02T14:35:15.21Z" },
+    { url = "https://files.pythonhosted.org/packages/39/c5/cc01e4f6188656e56112d6a8e0dfe298a16934b8c47a247236549a3f7695/xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6", size = 27880, upload-time = "2025-10-02T14:35:16.315Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/30/25e5321c8732759e930c555176d37e24ab84365482d257c3b16362235212/xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702", size = 32956, upload-time = "2025-10-02T14:35:17.413Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/3c/0573299560d7d9f8ab1838f1efc021a280b5ae5ae2e849034ef3dee18810/xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db", size = 31072, upload-time = "2025-10-02T14:35:18.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/1c/52d83a06e417cd9d4137722693424885cc9878249beb3a7c829e74bf7ce9/xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54", size = 196409, upload-time = "2025-10-02T14:35:20.31Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/8e/c6d158d12a79bbd0b878f8355432075fc82759e356ab5a111463422a239b/xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f", size = 215736, upload-time = "2025-10-02T14:35:21.616Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833, upload-time = "2025-10-02T14:35:23.32Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348, upload-time = "2025-10-02T14:35:25.111Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070, upload-time = "2025-10-02T14:35:26.586Z" },
+    { url = "https://files.pythonhosted.org/packages/96/b6/fcabd337bc5fa624e7203aa0fa7d0c49eed22f72e93229431752bddc83d9/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd", size = 212907, upload-time = "2025-10-02T14:35:28.087Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/d3/9ee6160e644d660fcf176c5825e61411c7f62648728f69c79ba237250143/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729", size = 200839, upload-time = "2025-10-02T14:35:29.857Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304, upload-time = "2025-10-02T14:35:31.222Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930, upload-time = "2025-10-02T14:35:32.517Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787, upload-time = "2025-10-02T14:35:33.827Z" },
+    { url = "https://files.pythonhosted.org/packages/19/fa/0172e350361d61febcea941b0cc541d6e6c8d65d153e85f850a7b256ff8a/xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec", size = 30916, upload-time = "2025-10-02T14:35:35.107Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/e6/e8cf858a2b19d6d45820f072eff1bea413910592ff17157cabc5f1227a16/xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8", size = 31799, upload-time = "2025-10-02T14:35:36.165Z" },
+    { url = "https://files.pythonhosted.org/packages/56/15/064b197e855bfb7b343210e82490ae672f8bc7cdf3ddb02e92f64304ee8a/xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746", size = 28044, upload-time = "2025-10-02T14:35:37.195Z" },
+]
+
+[[package]]
+name = "yarl"
+version = "1.22.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "multidict" },
+    { name = "propcache" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ea/f3/d67de7260456ee105dc1d162d43a019ecad6b91e2f51809d6cddaa56690e/yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53", size = 139980, upload-time = "2025-10-06T14:10:14.601Z" },
+    { url = "https://files.pythonhosted.org/packages/01/88/04d98af0b47e0ef42597b9b28863b9060bb515524da0a65d5f4db160b2d5/yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a", size = 93424, upload-time = "2025-10-06T14:10:16.115Z" },
+    { url = "https://files.pythonhosted.org/packages/18/91/3274b215fd8442a03975ce6bee5fe6aa57a8326b29b9d3d56234a1dca244/yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c", size = 93821, upload-time = "2025-10-06T14:10:17.993Z" },
+    { url = "https://files.pythonhosted.org/packages/61/3a/caf4e25036db0f2da4ca22a353dfeb3c9d3c95d2761ebe9b14df8fc16eb0/yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601", size = 373243, upload-time = "2025-10-06T14:10:19.44Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/9e/51a77ac7516e8e7803b06e01f74e78649c24ee1021eca3d6a739cb6ea49c/yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a", size = 342361, upload-time = "2025-10-06T14:10:21.124Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/f8/33b92454789dde8407f156c00303e9a891f1f51a0330b0fad7c909f87692/yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df", size = 387036, upload-time = "2025-10-06T14:10:22.902Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/9a/c5db84ea024f76838220280f732970aa4ee154015d7f5c1bfb60a267af6f/yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2", size = 397671, upload-time = "2025-10-06T14:10:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/11/c9/cd8538dc2e7727095e0c1d867bad1e40c98f37763e6d995c1939f5fdc7b1/yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b", size = 377059, upload-time = "2025-10-06T14:10:26.406Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/b9/ab437b261702ced75122ed78a876a6dec0a1b0f5e17a4ac7a9a2482d8abe/yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273", size = 365356, upload-time = "2025-10-06T14:10:28.461Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/9d/8e1ae6d1d008a9567877b08f0ce4077a29974c04c062dabdb923ed98e6fe/yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a", size = 361331, upload-time = "2025-10-06T14:10:30.541Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/5a/09b7be3905962f145b73beb468cdd53db8aa171cf18c80400a54c5b82846/yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d", size = 382590, upload-time = "2025-10-06T14:10:33.352Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/7f/59ec509abf90eda5048b0bc3e2d7b5099dffdb3e6b127019895ab9d5ef44/yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02", size = 385316, upload-time = "2025-10-06T14:10:35.034Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/84/891158426bc8036bfdfd862fabd0e0fa25df4176ec793e447f4b85cf1be4/yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67", size = 374431, upload-time = "2025-10-06T14:10:37.76Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/49/03da1580665baa8bef5e8ed34c6df2c2aca0a2f28bf397ed238cc1bbc6f2/yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95", size = 81555, upload-time = "2025-10-06T14:10:39.649Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ee/450914ae11b419eadd067c6183ae08381cfdfcb9798b90b2b713bbebddda/yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d", size = 86965, upload-time = "2025-10-06T14:10:41.313Z" },
+    { url = "https://files.pythonhosted.org/packages/98/4d/264a01eae03b6cf629ad69bae94e3b0e5344741e929073678e84bf7a3e3b/yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b", size = 81205, upload-time = "2025-10-06T14:10:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/88/fc/6908f062a2f77b5f9f6d69cecb1747260831ff206adcbc5b510aff88df91/yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10", size = 146209, upload-time = "2025-10-06T14:10:44.643Z" },
+    { url = "https://files.pythonhosted.org/packages/65/47/76594ae8eab26210b4867be6f49129861ad33da1f1ebdf7051e98492bf62/yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3", size = 95966, upload-time = "2025-10-06T14:10:46.554Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/ce/05e9828a49271ba6b5b038b15b3934e996980dd78abdfeb52a04cfb9467e/yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9", size = 97312, upload-time = "2025-10-06T14:10:48.007Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/c5/7dffad5e4f2265b29c9d7ec869c369e4223166e4f9206fc2243ee9eea727/yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f", size = 361967, upload-time = "2025-10-06T14:10:49.997Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b2/375b933c93a54bff7fc041e1a6ad2c0f6f733ffb0c6e642ce56ee3b39970/yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0", size = 323949, upload-time = "2025-10-06T14:10:52.004Z" },
+    { url = "https://files.pythonhosted.org/packages/66/50/bfc2a29a1d78644c5a7220ce2f304f38248dc94124a326794e677634b6cf/yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e", size = 361818, upload-time = "2025-10-06T14:10:54.078Z" },
+    { url = "https://files.pythonhosted.org/packages/46/96/f3941a46af7d5d0f0498f86d71275696800ddcdd20426298e572b19b91ff/yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708", size = 372626, upload-time = "2025-10-06T14:10:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/42/8b27c83bb875cd89448e42cd627e0fb971fa1675c9ec546393d18826cb50/yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f", size = 341129, upload-time = "2025-10-06T14:10:57.985Z" },
+    { url = "https://files.pythonhosted.org/packages/49/36/99ca3122201b382a3cf7cc937b95235b0ac944f7e9f2d5331d50821ed352/yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d", size = 346776, upload-time = "2025-10-06T14:10:59.633Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b4/47328bf996acd01a4c16ef9dcd2f59c969f495073616586f78cd5f2efb99/yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8", size = 334879, upload-time = "2025-10-06T14:11:01.454Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/ad/b77d7b3f14a4283bffb8e92c6026496f6de49751c2f97d4352242bba3990/yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5", size = 350996, upload-time = "2025-10-06T14:11:03.452Z" },
+    { url = "https://files.pythonhosted.org/packages/81/c8/06e1d69295792ba54d556f06686cbd6a7ce39c22307100e3fb4a2c0b0a1d/yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f", size = 356047, upload-time = "2025-10-06T14:11:05.115Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/b8/4c0e9e9f597074b208d18cef227d83aac36184bfbc6eab204ea55783dbc5/yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62", size = 342947, upload-time = "2025-10-06T14:11:08.137Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/e5/11f140a58bf4c6ad7aca69a892bff0ee638c31bea4206748fc0df4ebcb3a/yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03", size = 86943, upload-time = "2025-10-06T14:11:10.284Z" },
+    { url = "https://files.pythonhosted.org/packages/31/74/8b74bae38ed7fe6793d0c15a0c8207bbb819cf287788459e5ed230996cdd/yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249", size = 93715, upload-time = "2025-10-06T14:11:11.739Z" },
+    { url = "https://files.pythonhosted.org/packages/69/66/991858aa4b5892d57aef7ee1ba6b4d01ec3b7eb3060795d34090a3ca3278/yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b", size = 83857, upload-time = "2025-10-06T14:11:13.586Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" },
+]