add docstrings

dolfim-ibm · dolfim-ibm · commit a55d08cab417 · 2025-09-09T13:11:44.000+02:00
Signed-off-by: Michele Dolfi &lt;dol@zurich.ibm.com&gt;
diff --git a/examples/docling_picture_description.ipynb b/examples/docling_picture_description.ipynb
@@ -7,13 +7,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain_openai import ChatOpenAI\n",
-    "\n",
-    "from langchain_docling.picture_description import PictureDescriptionLangChainOptions\n",
-    "\n",
     "from docling.datamodel.base_models import InputFormat\n",
     "from docling.datamodel.pipeline_options import PdfPipelineOptions\n",
-    "from docling.document_converter import DocumentConverter, PdfFormatOption"
+    "from docling.document_converter import DocumentConverter, PdfFormatOption\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "from langchain_docling.picture_description import PictureDescriptionLangChainOptions"
    ]
   },
   {
diff --git a/langchain_docling/_plugins.py b/langchain_docling/_plugins.py
@@ -1,4 +1,8 @@
+"""Register Docling plugins."""
+
+
 def picture_description():
+    """Picture description plugins."""
     from langchain_docling.picture_description import PictureDescriptionLangChainModel
 
     return {
diff --git a/langchain_docling/picture_description.py b/langchain_docling/picture_description.py
@@ -1,35 +1,36 @@
+"""Picture description model using LangChain primitives."""
+
 import base64
-from collections.abc import Iterable
 import io
+from collections.abc import Iterable
 from pathlib import Path
 from typing import ClassVar, Literal, Optional, Type, Union
 
-from PIL import Image
-
 from docling.datamodel.accelerator_options import AcceleratorOptions
-from docling.datamodel.pipeline_options import (
-    PictureDescriptionBaseOptions,
-)
+from docling.datamodel.pipeline_options import PictureDescriptionBaseOptions
 from docling.models.picture_description_base_model import PictureDescriptionBaseModel
-from docling.models.utils.hf_model_download import (
-    HuggingFaceModelDownloadMixin,
-)
-
+from docling.models.utils.hf_model_download import HuggingFaceModelDownloadMixin
 from langchain_core.language_models.chat_models import BaseChatModel
+from PIL import Image
+
 
 class PictureDescriptionLangChainOptions(PictureDescriptionBaseOptions):
+    """Options for the PictureDescriptionLangChainModel."""
+
     kind: ClassVar[Literal["langchain"]] = "langchain"
     llm: BaseChatModel
     prompt: str = "Describe this document picture in a few sentences."
     provenance: Optional[str] = None
 
 
-
 class PictureDescriptionLangChainModel(
     PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin
 ):
+    """Implementation of a PictureDescription model using LangChain."""
+
     @classmethod
     def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
+        """Define the option type for the factory."""
         return PictureDescriptionLangChainOptions
 
     def __init__(
@@ -40,6 +41,7 @@ def __init__(
         options: PictureDescriptionLangChainOptions,
         accelerator_options: AcceleratorOptions,
     ):
+        """Initialize PictureDescriptionLangChainModel."""
         super().__init__(
             enabled=enabled,
             enable_remote_services=enable_remote_services,
@@ -56,7 +58,7 @@ def __init__(
                 self.provenance += f"-{self.options.provenance}"
 
     def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
-
+        """Annotate the images with the LangChain model."""
         # Create input messages
         batch_messages = []
 
@@ -66,20 +68,23 @@ def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
             buffered = io.BytesIO()
             image.save(buffered, format="PNG")
             image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
-            batch_messages.append([
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": self.options.prompt},
+            batch_messages.append(
+                [
                     {
-                        "type": "image_url",
-                        "image_url": {"url": f"data:image/png;base64,{image_data}"},
-                    },
-                ],
-            }]
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": self.options.prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/png;base64,{image_data}"
+                                },
+                            },
+                        ],
+                    }
+                ]
             )
 
         responses = self.llm.batch(batch_messages)
         for resp in responses:
             yield resp.text()
-
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,11 +36,6 @@ dependencies = [
   "docling~=2.18",
 ]
 
-[project.optional-dependencies]
-plugin = [
-    "langchain-openai>=0.2.12",
-]
-
 [project.urls]
 homepage = "https://github.com/docling-project"
 repository = "https://github.com/docling-project/docling-langchain"
@@ -68,6 +63,7 @@ dev = [
     "pytest~=8.3",
     "pytest-cov>=6.1.1",
     "python-semantic-release~=7.32",
+    "langchain-openai>=0.2.12",
 ]
 
 [tool.uv]
diff --git a/uv.lock b/uv.lock