deepset-ai · anakin87 · Jan 12, 2026 · Jan 12, 2026
@@ -7,7 +7,7 @@ name = "ollama-haystack"
 dynamic = ["version"]
 description = 'An integration between the Ollama LLM framework and Haystack'
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = "Apache-2.0"
 keywords = []
 authors = [
@@ -19,15 +19,14 @@ classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Development Status :: 4 - Beta",
     "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.19.0", "ollama>=0.5.0", "pydantic"]
+dependencies = ["haystack-ai>=2.22.0", "ollama>=0.5.0", "pydantic"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama#readme"
@@ -88,7 +87,6 @@ known-first-party = ["haystack_integrations"]
 
 
 [tool.ruff]
-target-version = "py39"
 line-length = 120
 
 [tool.ruff.lint]
@@ -132,10 +130,6 @@ ignore = [
     "PLR0913",
     "PLR0915",
 ]
-unfixable = [
-    # Don't touch unused imports
-    "F401",
-]
 
 [tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "parents"

@@ -1,5 +1,5 @@
 import asyncio
-from typing import Any, Optional, Union
+from typing import Any
 
 from haystack import Document, component
 from tqdm import tqdm
@@ -30,13 +30,13 @@ def __init__(
         self,
         model: str = "nomic-embed-text",
         url: str = "http://localhost:11434",
-        generation_kwargs: Optional[dict[str, Any]] = None,
+        generation_kwargs: dict[str, Any] | None = None,
         timeout: int = 120,
-        keep_alive: Optional[Union[float, str]] = None,
+        keep_alive: float | str | None = None,
         prefix: str = "",
         suffix: str = "",
         progress_bar: bool = True,
-        meta_fields_to_embed: Optional[list[str]] = None,
+        meta_fields_to_embed: list[str] | None = None,
         embedding_separator: str = "\n",
         batch_size: int = 32,
     ):
@@ -123,7 +123,7 @@ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]:
         return texts_to_embed
 
     def _embed_batch(
-        self, texts_to_embed: list[str], batch_size: int, generation_kwargs: Optional[dict[str, Any]] = None
+        self, texts_to_embed: list[str], batch_size: int, generation_kwargs: dict[str, Any] | None = None
     ) -> list[list[float]]:
         """
         Internal method to embed a batch of texts.
@@ -146,7 +146,7 @@ def _embed_batch(
         return all_embeddings
 
     async def _embed_batch_async(
-        self, texts_to_embed: list[str], batch_size: int, generation_kwargs: Optional[dict[str, Any]] = None
+        self, texts_to_embed: list[str], batch_size: int, generation_kwargs: dict[str, Any] | None = None
     ) -> list[list[float]]:
         """
         Internal method to embed a batch of texts asynchronously.
@@ -177,8 +177,8 @@ async def _embed_batch_async(
 
     @component.output_types(documents=list[Document], meta=dict[str, Any])
     def run(
-        self, documents: list[Document], generation_kwargs: Optional[dict[str, Any]] = None
-    ) -> dict[str, Union[list[Document], dict[str, Any]]]:
+        self, documents: list[Document], generation_kwargs: dict[str, Any] | None = None
+    ) -> dict[str, list[Document] | dict[str, Any]]:
         """
         Runs an Ollama Model to compute embeddings of the provided documents.
 
@@ -205,15 +205,15 @@ def run(
             texts_to_embed=texts_to_embed, batch_size=self.batch_size, generation_kwargs=generation_kwargs
         )
 
-        for doc, emb in zip(documents, embeddings):
+        for doc, emb in zip(documents, embeddings, strict=True):
             doc.embedding = emb
 
         return {"documents": documents, "meta": {"model": self.model}}
 
     @component.output_types(documents=list[Document], meta=dict[str, Any])
     async def run_async(
-        self, documents: list[Document], generation_kwargs: Optional[dict[str, Any]] = None
-    ) -> dict[str, Union[list[Document], dict[str, Any]]]:
+        self, documents: list[Document], generation_kwargs: dict[str, Any] | None = None
+    ) -> dict[str, list[Document] | dict[str, Any]]:
         """
         Asynchronously run an Ollama Model to compute embeddings of the provided documents.
 
@@ -241,7 +241,7 @@ async def run_async(
             texts_to_embed=texts_to_embed, batch_size=self.batch_size, generation_kwargs=generation_kwargs
         )
 
-        for doc, emb in zip(documents, embeddings):
+        for doc, emb in zip(documents, embeddings, strict=True):
             doc.embedding = emb
 
         return {"documents": documents, "meta": {"model": self.model}}
@@ -1,4 +1,4 @@
-from typing import Any, Optional, Union
+from typing import Any
 
 from haystack import component
 
@@ -25,9 +25,9 @@ def __init__(
         self,
         model: str = "nomic-embed-text",
         url: str = "http://localhost:11434",
-        generation_kwargs: Optional[dict[str, Any]] = None,
+        generation_kwargs: dict[str, Any] | None = None,
         timeout: int = 120,
-        keep_alive: Optional[Union[float, str]] = None,
+        keep_alive: float | str | None = None,
     ):
         """
         :param model:
@@ -60,8 +60,8 @@ def __init__(
 
     @component.output_types(embedding=list[float], meta=dict[str, Any])
     def run(
-        self, text: str, generation_kwargs: Optional[dict[str, Any]] = None
-    ) -> dict[str, Union[list[float], dict[str, Any]]]:
+        self, text: str, generation_kwargs: dict[str, Any] | None = None
+    ) -> dict[str, list[float] | dict[str, Any]]:
         """
         Runs an Ollama Model to compute embeddings of the provided text.
 
@@ -87,8 +87,8 @@ def run(
 
     @component.output_types(embedding=list[float], meta=dict[str, Any])
     async def run_async(
-        self, text: str, generation_kwargs: Optional[dict[str, Any]] = None
-    ) -> dict[str, Union[list[float], dict[str, Any]]]:
+        self, text: str, generation_kwargs: dict[str, Any] | None = None
+    ) -> dict[str, list[float] | dict[str, Any]]:
         """
         Asynchronously run an Ollama Model to compute embeddings of the provided text.
 

@@ -1,6 +1,6 @@
 import json
-from collections.abc import AsyncIterator, Iterator
-from typing import Any, Callable, Literal, Optional, Union
+from collections.abc import AsyncIterator, Callable, Iterator
+from typing import Any, Literal
 
 from haystack import component, default_from_dict, default_to_dict
 from haystack.dataclasses import (
@@ -212,13 +212,13 @@ def __init__(
         self,
         model: str = "qwen3:0.6b",
         url: str = "http://localhost:11434",
-        generation_kwargs: Optional[dict[str, Any]] = None,
+        generation_kwargs: dict[str, Any] | None = None,
         timeout: int = 120,
-        keep_alive: Optional[Union[float, str]] = None,
-        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
-        tools: Optional[ToolsType] = None,
-        response_format: Optional[Union[None, Literal["json"], JsonSchemaValue]] = None,
-        think: Union[bool, Literal["low", "medium", "high"]] = False,
+        keep_alive: float | str | None = None,
+        streaming_callback: Callable[[StreamingChunk], None] | None = None,
+        tools: ToolsType | None = None,
+        response_format: None | Literal["json"] | JsonSchemaValue | None = None,
+        think: bool | Literal["low", "medium", "high"] = False,
     ):
         """
         :param model:
@@ -315,7 +315,7 @@ def from_dict(cls, data: dict[str, Any]) -> "OllamaChatGenerator":
     def _handle_streaming_response(
         self,
         response_iter: Iterator[ChatResponse],
-        callback: Optional[SyncStreamingCallbackT],
+        callback: SyncStreamingCallbackT | None,
     ) -> dict[str, list[ChatMessage]]:
         """
         Merge an Ollama streaming response into a single ChatMessage, preserving
@@ -399,7 +399,7 @@ def _handle_streaming_response(
     async def _handle_streaming_response_async(
         self,
         response_iter: AsyncIterator[ChatResponse],
-        callback: Optional[AsyncStreamingCallbackT],
+        callback: AsyncStreamingCallbackT | None,
     ) -> dict[str, list[ChatMessage]]:
         """
         Merge an Ollama async streaming response into a single ChatMessage, preserving
@@ -471,10 +471,10 @@ async def _handle_streaming_response_async(
     def run(
         self,
         messages: list[ChatMessage],
-        generation_kwargs: Optional[dict[str, Any]] = None,
-        tools: Optional[ToolsType] = None,
+        generation_kwargs: dict[str, Any] | None = None,
+        tools: ToolsType | None = None,
         *,
-        streaming_callback: Optional[StreamingCallbackT] = None,
+        streaming_callback: StreamingCallbackT | None = None,
     ) -> dict[str, list[ChatMessage]]:
         """
         Runs an Ollama Model on a given chat history.
@@ -537,10 +537,10 @@ def run(
     async def run_async(
         self,
         messages: list[ChatMessage],
-        generation_kwargs: Optional[dict[str, Any]] = None,
-        tools: Optional[ToolsType] = None,
+        generation_kwargs: dict[str, Any] | None = None,
+        tools: ToolsType | None = None,
         *,
-        streaming_callback: Optional[StreamingCallbackT] = None,
+        streaming_callback: StreamingCallbackT | None = None,
     ) -> dict[str, list[ChatMessage]]:
         """
         Async version of run. Runs an Ollama Model on a given chat history.

@@ -1,4 +1,5 @@
-from typing import Any, Callable, Optional, Union
+from collections.abc import Callable
+from typing import Any
 
 from haystack import component, default_from_dict, default_to_dict
 from haystack.dataclasses import StreamingChunk
@@ -98,13 +99,13 @@ def __init__(
         self,
         model: str = "orca-mini",
         url: str = "http://localhost:11434",
-        generation_kwargs: Optional[dict[str, Any]] = None,
-        system_prompt: Optional[str] = None,
-        template: Optional[str] = None,
+        generation_kwargs: dict[str, Any] | None = None,
+        system_prompt: str | None = None,
+        template: str | None = None,
         raw: bool = False,
         timeout: int = 120,
-        keep_alive: Optional[Union[float, str]] = None,
-        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
+        keep_alive: float | str | None = None,
+        streaming_callback: Callable[[StreamingChunk], None] | None = None,
     ):
         """
         :param model:
@@ -208,7 +209,7 @@ def _convert_to_streaming_response(self, chunks: list[StreamingChunk]) -> dict[s
         return {"replies": replies, "meta": [meta]}
 
     def _handle_streaming_response(
-        self, response: Any, streaming_callback: Optional[Callable[[StreamingChunk], None]]
+        self, response: Any, streaming_callback: Callable[[StreamingChunk], None] | None
     ) -> list[StreamingChunk]:
         """
         Handles Streaming response cases
@@ -236,9 +237,9 @@ def _build_chunk(self, chunk_response: Any) -> StreamingChunk:
     def run(
         self,
         prompt: str,
-        generation_kwargs: Optional[dict[str, Any]] = None,
+        generation_kwargs: dict[str, Any] | None = None,
         *,
-        streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
+        streaming_callback: Callable[[StreamingChunk], None] | None = None,
     ) -> dict[str, list[Any]]:
         """
         Runs an Ollama Model on the given prompt.