lstein
diff --git a/‎docs/src/content/docs/features/External Models/alibabacloud.mdx‎
Lines changed: 3 additions & 3 deletions b/‎docs/src/content/docs/features/External Models/alibabacloud.mdx‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/src/content/docs/features/External Models/gemini.mdx‎
Lines changed: 5 additions & 7 deletions b/‎docs/src/content/docs/features/External Models/gemini.mdx‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎docs/src/content/docs/features/External Models/index.mdx‎
Lines changed: 4 additions & 2 deletions b/‎docs/src/content/docs/features/External Models/index.mdx‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/src/content/docs/features/External Models/openai.mdx‎
Lines changed: 11 additions & 6 deletions b/‎docs/src/content/docs/features/External Models/openai.mdx‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎invokeai/app/invocations/anima_model_loader.py‎
Lines changed: 4 additions & 20 deletions b/‎invokeai/app/invocations/anima_model_loader.py‎
Lines changed: 4 additions & 20 deletions
diff --git a/‎invokeai/app/invocations/anima_text_encoder.py‎
Lines changed: 14 additions & 19 deletions b/‎invokeai/app/invocations/anima_text_encoder.py‎
Lines changed: 14 additions & 19 deletions
diff --git a/‎invokeai/app/services/model_install/model_install_default.py‎
Lines changed: 26 additions & 1 deletion b/‎invokeai/app/services/model_install/model_install_default.py‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎invokeai/backend/anima/t5_tokenizer.py‎
Lines changed: 25 additions & 0 deletions b/‎invokeai/backend/anima/t5_tokenizer.py‎
Lines changed: 25 additions & 0 deletions
@@ -39,16 +39,16 @@ DashScope has separate international (`dashscope-intl.aliyuncs.com`) and China (
 | **Qwen Image 2.0 Pro** | txt2img | 1:1, 4:3, 3:4, 16:9, 9:16 | up to 4 | Best quality, 2K output, excellent bilingual text. |
 | **Qwen Image 2.0** | txt2img | 1:1, 4:3, 3:4, 16:9, 9:16 | up to 4 | Faster / cheaper 2K sibling of 2.0 Pro. |
 | **Qwen Image Max** | txt2img | 1:1, 4:3, 3:4, 16:9, 9:16 | up to 4 | High quality at ~1.3K native size. |
-| **Qwen Image Edit Max** | txt2img + reference images | 1:1, 4:3, 3:4, 16:9, 9:16 | up to 4 | Image editing with industrial / geometric reasoning. Accepts up to 3 reference images. |
+| **Qwen Image Edit Max** | txt2img (with reference images) | 1:1, 4:3, 3:4, 16:9, 9:16 | up to 4 | Reference-image-driven generation with industrial / geometric reasoning. Accepts up to 14 reference images. |
 | **Wan 2.6 Text-to-Image** | txt2img | 1:1, 4:3, 3:4, 16:9, 9:16 | up to 4 | Photorealistic T2I at 1K. |
 
-All models support **seed**. Negative prompts are not currently plumbed through to DashScope, so the negative prompt input is ignored for these providers.
+All models support **seed**. Negative prompts are not currently plumbed through to DashScope, so the negative prompt input is ignored for these providers. None of the Alibaba Cloud models support img2img (denoising-strength edits) or inpaint (mask-based edits) in Invoke today.
 
 ## Tips
 
 <Steps>
 1. Bilingual prompts. Qwen Image is unusually good at rendering Chinese text and mixed-language prompts — it's a strong choice when your prompt or desired output contains non-Latin script.
-2. Editing is only supported by Qwen Image Edit Max. Provide up to 3 reference images via the reference-images panel; masks and denoising strength are not supported for this provider.
+2. Reference-image input is only accepted by Qwen Image Edit Max — provide images via the reference-images panel. Masks and denoising strength are not supported for any Alibaba Cloud model.
 3. Batching is capped at 4 images per request. Larger batches are split across multiple API calls.
 4. Costs vary per model — Qwen Image 2.0 Pro is the most expensive, Qwen Image 2.0 the cheapest of the 2.0 family. Check Alibaba Cloud's pricing page before running large batches.
 </Steps>
@@ -31,15 +31,13 @@ Restart Invoke for the change to take effect.
 
 | Model | Modes | Reference Images | Notes |
 | --- | --- | --- | --- |
-| **Gemini 2.5 Flash Image** | txt2img, img2img, inpaint | Yes | 10 aspect ratios, fixed per-ratio resolutions. |
-| **Gemini 3 Pro Image Preview** | txt2img, img2img, inpaint | Up to 14 (6 object + 5 character) | 1K / 2K / 4K resolution presets. |
-| **Gemini 3.1 Flash Image Preview** | txt2img, img2img, inpaint | Up to 14 (10 object + 4 character) | 512 / 1K / 2K / 4K resolution presets. |
+| **Gemini 2.5 Flash Image** | txt2img | Yes | 10 aspect ratios, fixed per-ratio resolutions. |
+| **Gemini 3 Pro Image Preview** | txt2img | Up to 14 (6 object + 5 character) | 1K / 2K / 4K resolution presets. |
+| **Gemini 3.1 Flash Image Preview** | txt2img | Up to 14 (10 object + 4 character) | 512 / 1K / 2K / 4K resolution presets. |
 
-All Gemini models are single-image-per-request — batch size is fixed at 1. To generate multiple variations, queue multiple invocations.
-
-## Provider-Specific Options
+Reference-image input is used to condition generation but counts as txt2img — neither img2img (denoising strength) nor inpaint (mask) is supported for Gemini.
 
-Gemini exposes a **temperature** control in the parameters panel. Lower values make outputs more deterministic, higher values increase variability.
+All Gemini models are single-image-per-request — batch size is fixed at 1. To generate multiple variations, queue multiple invocations.
 
 ## Tips
 
 
@@ -13,7 +13,9 @@ External models appear in the model picker alongside locally installed models. G
 ## Supported Providers
 
 - [Google Gemini](/features/external-models/gemini/) — Gemini 2.5 Flash Image, Gemini 3 Pro Image Preview, Gemini 3.1 Flash Image Preview
-- [OpenAI](/features/external-models/openai/) — GPT Image 1 / 1.5 / 1-mini, DALL·E 3, DALL·E 2
+- [OpenAI](/features/external-models/openai/) — GPT Image 1 / 1.5 / 1-mini, DALL·E 3
+- [BytePlus Seedream](/features/external-models/seedream/) — Seedream 5.0, 5.0 Lite, 4.5, 4.0
+- [Alibaba Cloud DashScope](/features/external-models/alibabacloud/) — Qwen Image 2.0 / 2.0 Pro / Max / Edit Max, Wan 2.6 T2I
 
 ## Configuring API Keys
 
@@ -44,7 +46,7 @@ Once installed, external models show up everywhere a model can be selected. Choo
 
 Each external model declares its own **capabilities** — for example:
 
-- Which generation modes it supports (`txt2img`, `img2img`, `inpaint`).
+- Which generation modes it supports (`txt2img`, `img2img`). Inpainting is not currently supported by any external provider.
 - Whether it accepts reference images, and how many.
 - Which aspect ratios and resolutions it allows.
 - Whether it supports a negative prompt, seed, or batch size > 1.
 
@@ -4,7 +4,11 @@ title: OpenAI
 
 import { Steps } from '@astrojs/starlight/components'
 
-Invoke supports OpenAI's image generation models — both the GPT Image family and the older DALL·E models — through the OpenAI API.
+Invoke supports OpenAI's image generation models — the GPT Image family and DALL·E 3 — through the OpenAI API.
+
+:::note[DALL·E 2 removed]
+DALL·E 2 was deprecated by OpenAI and is scheduled for shutdown on 2026-05-12. It is no longer offered as a starter model in Invoke.
+:::
 
 ## Getting an API Key
 
@@ -31,11 +35,12 @@ Restart Invoke for the change to take effect.
 
 | Model | Modes | Aspect Ratios | Batch | Notes |
 | --- | --- | --- | --- | --- |
-| **GPT Image 1.5** | txt2img, img2img, inpaint | 1:1, 3:2, 2:3 | up to 10 | Fastest and cheapest GPT Image model. |
-| **GPT Image 1** | txt2img, img2img, inpaint | 1:1, 3:2, 2:3 | up to 10 | Highest quality of the GPT Image family. |
-| **GPT Image 1 Mini** | txt2img, img2img, inpaint | 1:1, 3:2, 2:3 | up to 10 | ~80% cheaper than GPT Image 1. |
+| **GPT Image 1.5** | txt2img, img2img | 1:1, 3:2, 2:3 | up to 10 | Fastest and cheapest GPT Image model. |
+| **GPT Image 1** | txt2img, img2img | 1:1, 3:2, 2:3 | up to 10 | Highest quality of the GPT Image family. |
+| **GPT Image 1 Mini** | txt2img, img2img | 1:1, 3:2, 2:3 | up to 10 | ~80% cheaper than GPT Image 1. |
 | **DALL·E 3** | txt2img only | 1:1, 7:4, 4:7 | 1 | No reference-image / edit support. |
-| **DALL·E 2** | txt2img, img2img, inpaint | 1:1 | up to 10 | Square only. |
+
+Inpainting (mask-based editing) is not currently supported for any OpenAI model in Invoke. img2img on the GPT Image family routes through the `/v1/images/edits` endpoint without a mask.
 
 ## Provider-Specific Options
 
@@ -54,7 +59,7 @@ DALL·E 2 and DALL·E 3 do not expose these options.
 ## Tips
 
 <Steps>
-1. Batching on GPT Image and DALL·E 2 tops out at 10 per request. Larger batches are split into multiple API calls.
+1. Batching on GPT Image tops out at 10 per request. Larger batches are split into multiple API calls.
 2. Costs can climb quickly with high-quality GPT Image generations. Start with GPT Image 1 Mini when iterating on prompts.
 3. Rate limits from OpenAI surface as failed invocations — retry after a short wait.
 </Steps>
@@ -9,15 +9,10 @@
 from invokeai.app.invocations.model import (
     ModelIdentifierField,
     Qwen3EncoderField,
-    T5EncoderField,
     TransformerField,
     VAEField,
 )
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.app.util.t5_model_identifier import (
-    preprocess_t5_encoder_model_identifier,
-    preprocess_t5_tokenizer_model_identifier,
-)
 from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType, SubModelType
 
 
@@ -28,15 +23,14 @@ class AnimaModelLoaderOutput(BaseInvocationOutput):
     transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
     qwen3_encoder: Qwen3EncoderField = OutputField(description=FieldDescriptions.qwen3_encoder, title="Qwen3 Encoder")
     vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
-    t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder")
 
 
 @invocation(
     "anima_model_loader",
     title="Main Model - Anima",
     tags=["model", "anima"],
     category="model",
-    version="1.3.0",
+    version="1.4.0",
     classification=Classification.Prototype,
 )
 class AnimaModelLoaderInvocation(BaseInvocation):
@@ -46,7 +40,9 @@ class AnimaModelLoaderInvocation(BaseInvocation):
     - Transformer: Cosmos Predict2 DiT + LLM Adapter (from single-file checkpoint)
     - Qwen3 Encoder: Qwen3 0.6B (standalone single-file)
     - VAE: AutoencoderKLQwenImage / Wan 2.1 VAE (standalone single-file or FLUX VAE)
-    - T5 Encoder: T5-XXL model (only the tokenizer submodel is used, for LLM Adapter token IDs)
+
+    The T5-XXL tokenizer needed for LLM Adapter token IDs is bundled in the package,
+    so no T5-XXL encoder model needs to be installed.
     """
 
     model: ModelIdentifierField = InputField(
@@ -72,13 +68,6 @@ class AnimaModelLoaderInvocation(BaseInvocation):
         title="Qwen3 Encoder",
     )
 
-    t5_encoder_model: ModelIdentifierField = InputField(
-        description="T5-XXL encoder model. The tokenizer submodel is used for Anima text encoding.",
-        input=Input.Direct,
-        ui_model_type=ModelType.T5Encoder,
-        title="T5 Encoder",
-    )
-
     def invoke(self, context: InvocationContext) -> AnimaModelLoaderOutput:
         # Transformer always comes from the main model
         transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
@@ -90,13 +79,8 @@ def invoke(self, context: InvocationContext) -> AnimaModelLoaderOutput:
         qwen3_tokenizer = self.qwen3_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
         qwen3_encoder = self.qwen3_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
 
-        # T5 Encoder (only tokenizer submodel is used by Anima)
-        t5_tokenizer = preprocess_t5_tokenizer_model_identifier(self.t5_encoder_model)
-        t5_encoder = preprocess_t5_encoder_model_identifier(self.t5_encoder_model)
-
         return AnimaModelLoaderOutput(
             transformer=TransformerField(transformer=transformer, loras=[]),
             qwen3_encoder=Qwen3EncoderField(tokenizer=qwen3_tokenizer, text_encoder=qwen3_encoder),
             vae=VAEField(vae=vae),
-            t5_encoder=T5EncoderField(tokenizer=t5_tokenizer, text_encoder=t5_encoder, loras=[]),
         )
@@ -28,9 +28,10 @@
     TensorField,
     UIComponent,
 )
-from invokeai.app.invocations.model import Qwen3EncoderField, T5EncoderField
+from invokeai.app.invocations.model import Qwen3EncoderField
 from invokeai.app.invocations.primitives import AnimaConditioningOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.anima.t5_tokenizer import load_bundled_t5_tokenizer
 from invokeai.backend.patches.layer_patcher import LayerPatcher
 from invokeai.backend.patches.lora_conversions.anima_lora_constants import ANIMA_LORA_QWEN3_PREFIX
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
@@ -56,13 +57,13 @@
     title="Prompt - Anima",
     tags=["prompt", "conditioning", "anima"],
     category="conditioning",
-    version="1.3.0",
+    version="1.4.0",
     classification=Classification.Prototype,
 )
 class AnimaTextEncoderInvocation(BaseInvocation):
     """Encodes and preps a prompt for an Anima image.
 
-    Uses Qwen3 0.6B for hidden state extraction and T5-XXL tokenizer for
+    Uses Qwen3 0.6B for hidden state extraction and a bundled T5-XXL tokenizer for
     token IDs (no T5 model weights needed). Both are combined by the
     LLM Adapter inside the Anima transformer during denoising.
     """
@@ -73,11 +74,6 @@ class AnimaTextEncoderInvocation(BaseInvocation):
         description=FieldDescriptions.qwen3_encoder,
         input=Input.Connection,
     )
-    t5_encoder: T5EncoderField = InputField(
-        title="T5 Encoder",
-        description=FieldDescriptions.t5_encoder,
-        input=Input.Connection,
-    )
     mask: TensorField | None = InputField(
         default=None,
         description="A mask defining the region that this conditioning prompt applies to.",
@@ -193,18 +189,17 @@ def _encode_prompt(
             # Use last hidden state — only real tokens, no padding
             qwen3_embeds = outputs.hidden_states[-1][0]  # Shape: (seq_len, 1024)
 
-        # --- Step 2: Tokenize with T5-XXL tokenizer (IDs only, no model) ---
+        # --- Step 2: Tokenize with bundled T5-XXL tokenizer (IDs only, no model) ---
         context.util.signal_progress("Tokenizing with T5-XXL")
-        t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer)
-        with t5_tokenizer_info.model_on_device() as (_, t5_tokenizer):
-            t5_tokens = t5_tokenizer(
-                prompt,
-                padding=False,
-                truncation=True,
-                max_length=T5_MAX_SEQ_LEN,
-                return_tensors="pt",
-            )
-            t5xxl_ids = t5_tokens.input_ids[0]  # Shape: (seq_len,)
+        t5_tokenizer = load_bundled_t5_tokenizer()
+        t5_tokens = t5_tokenizer(
+            prompt,
+            padding=False,
+            truncation=True,
+            max_length=T5_MAX_SEQ_LEN,
+            return_tensors="pt",
+        )
+        t5xxl_ids = t5_tokens.input_ids[0]  # Shape: (seq_len,)
 
         return qwen3_embeds, t5xxl_ids, None
 
 
@@ -112,6 +112,8 @@ def __init__(
         self._stop_event = threading.Event()
         self._downloads_changed_event = threading.Event()
         self._install_completed_event = threading.Event()
+        self._restore_completed_event = threading.Event()
+        self._restore_completed_event.set()
         self._download_queue = download_queue
         self._download_cache: Dict[int, ModelInstallJob] = {}
         # Per-source locks serializing download_and_cache_model() so parallel (multi-GPU) sessions
@@ -269,16 +271,23 @@ def _restore_incomplete_installs(self) -> None:
                         self._safe_rmtree(job._install_tmpdir, self._logger)
 
     def _restore_incomplete_installs_async(self) -> None:
+        self._restore_completed_event.clear()
+
         def _run() -> None:
             try:
                 self._logger.info("Restoring incomplete installs")
                 self._restore_incomplete_installs()
                 self._logger.info("Finished restoring incomplete installs")
             except Exception as e:
                 self._logger.error(f"Failed to restore incomplete installs: {e}")
+            finally:
+                self._restore_completed_event.set()
 
         threading.Thread(target=_run, daemon=True).start()
 
+    def _wait_for_restore_complete(self) -> None:
+        self._restore_completed_event.wait()
+
     def _resume_remote_download(self, job: ModelInstallJob) -> None:
         job.status = InstallStatus.WAITING
         if job.download_parts:
@@ -464,6 +473,8 @@ def heuristic_import(
         return self.import_model(source_obj, config)
 
     def import_model(self, source: ModelSource, config: Optional[ModelRecordChanges] = None) -> ModelInstallJob:  # noqa D102
+        self._wait_for_restore_complete()
+
         similar_jobs = [x for x in self.list_jobs() if x.source == source and not x.in_terminal_state]
         if similar_jobs:
             self._logger.warning(f"There is already an active install job for {source}. Not enqueuing.")
@@ -511,6 +522,8 @@ def wait_for_job(self, job: ModelInstallJob, timeout: int = 0) -> ModelInstallJo
 
     def wait_for_installs(self, timeout: int = 0) -> List[ModelInstallJob]:  # noqa D102
         """Block until all installation jobs are done."""
+        self._wait_for_restore_complete()
+
         start = time.time()
         while len(self._download_cache) > 0:
             if self._downloads_changed_event.wait(timeout=0.25):  # in case we miss an event
@@ -787,7 +800,7 @@ def _remote_files_from_source(
             except ValueError:
                 pass
 
-            return [RemoteModelFile(url=source.url, path=Path("."), size=0)], None
+            return [RemoteModelFile(url=self._normalize_huggingface_blob_url(source.url), path=Path("."), size=0)], None
 
         raise Exception(f"No files associated with {source}")
 
@@ -1513,3 +1526,15 @@ def get_fetcher_from_url(url: str) -> Type[ModelMetadataFetchBase]:
         if re.match(r"^https?://huggingface.co/[^/]+/[^/]+$", url.lower()):
             return HuggingFaceMetadataFetch
         raise ValueError(f"Unsupported model source: '{url}'")
+
+    @staticmethod
+    def _normalize_huggingface_blob_url(url: AnyHttpUrl) -> Url:
+        """Convert Hugging Face file page URLs to direct download URLs."""
+        return Url(
+            re.sub(
+                r"^(https?://huggingface\.co/[^/]+/[^/]+)/blob/([^?#]+)([?#].*)?$",
+                r"\1/resolve/\2\3",
+                str(url),
+                flags=re.IGNORECASE,
+            )
+        )
@@ -0,0 +1,25 @@
+"""Bundled T5-XXL tokenizer for Anima.
+
+Anima tokenizes the prompt with the T5-XXL tokenizer to produce token IDs that
+index the LLM Adapter's learned embedding table. Only the tokenizer is needed —
+never the 9GB T5-XXL weights — so the tokenizer is vendored in the package as a
+self-contained fast tokenizer (tokenizer.json), avoiding both the large download
+and the sentencepiece runtime path.
+"""
+
+from functools import lru_cache
+from pathlib import Path
+
+from transformers import T5TokenizerFast
+
+# Size of the LLM Adapter's token embedding table (T5 v1.1 vocab incl. 100 sentinel
+# extra_id tokens). Token IDs must stay within this range.
+ANIMA_T5_VOCAB_SIZE = 32128
+
+_TOKENIZER_DIR = Path(__file__).parent / "tokenizer"
+
+
+@lru_cache(maxsize=1)
+def load_bundled_t5_tokenizer() -> T5TokenizerFast:
+    """Load the vendored T5-XXL fast tokenizer. Result is cached for the process."""
+    return T5TokenizerFast.from_pretrained(_TOKENIZER_DIR)