1+ """Picture description model using LangChain primitives."""
2+
13import base64
2- from collections .abc import Iterable
34import io
5+ from collections .abc import Iterable
46from pathlib import Path
57from typing import ClassVar , Literal , Optional , Type , Union
68
7- from PIL import Image
8-
99from docling .datamodel .accelerator_options import AcceleratorOptions
10- from docling .datamodel .pipeline_options import (
11- PictureDescriptionBaseOptions ,
12- )
10+ from docling .datamodel .pipeline_options import PictureDescriptionBaseOptions
1311from docling .models .picture_description_base_model import PictureDescriptionBaseModel
14- from docling .models .utils .hf_model_download import (
15- HuggingFaceModelDownloadMixin ,
16- )
17-
12+ from docling .models .utils .hf_model_download import HuggingFaceModelDownloadMixin
1813from langchain_core .language_models .chat_models import BaseChatModel
14+ from PIL import Image
15+
1916
2017class PictureDescriptionLangChainOptions (PictureDescriptionBaseOptions ):
18+ """Options for the PictureDescriptionLangChainModel."""
19+
2120 kind : ClassVar [Literal ["langchain" ]] = "langchain"
2221 llm : BaseChatModel
2322 prompt : str = "Describe this document picture in a few sentences."
2423 provenance : Optional [str ] = None
2524
2625
27-
2826class PictureDescriptionLangChainModel (
2927 PictureDescriptionBaseModel , HuggingFaceModelDownloadMixin
3028):
29+ """Implementation of a PictureDescription model using LangChain."""
30+
3131 @classmethod
3232 def get_options_type (cls ) -> Type [PictureDescriptionBaseOptions ]:
33+ """Define the option type for the factory."""
3334 return PictureDescriptionLangChainOptions
3435
3536 def __init__ (
@@ -40,6 +41,7 @@ def __init__(
4041 options : PictureDescriptionLangChainOptions ,
4142 accelerator_options : AcceleratorOptions ,
4243 ):
44+ """Initialize PictureDescriptionLangChainModel."""
4345 super ().__init__ (
4446 enabled = enabled ,
4547 enable_remote_services = enable_remote_services ,
@@ -56,7 +58,7 @@ def __init__(
5658 self .provenance += f"-{ self .options .provenance } "
5759
5860 def _annotate_images (self , images : Iterable [Image .Image ]) -> Iterable [str ]:
59-
61+ """Annotate the images with the LangChain model."""
6062 # Create input messages
6163 batch_messages = []
6264
@@ -66,20 +68,23 @@ def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
6668 buffered = io .BytesIO ()
6769 image .save (buffered , format = "PNG" )
6870 image_data = base64 .b64encode (buffered .getvalue ()).decode ("utf-8" )
69- batch_messages .append ([
70- {
71- "role" : "user" ,
72- "content" : [
73- {"type" : "text" , "text" : self .options .prompt },
71+ batch_messages .append (
72+ [
7473 {
75- "type" : "image_url" ,
76- "image_url" : {"url" : f"data:image/png;base64,{ image_data } " },
77- },
78- ],
79- }]
74+ "role" : "user" ,
75+ "content" : [
76+ {"type" : "text" , "text" : self .options .prompt },
77+ {
78+ "type" : "image_url" ,
79+ "image_url" : {
80+ "url" : f"data:image/png;base64,{ image_data } "
81+ },
82+ },
83+ ],
84+ }
85+ ]
8086 )
8187
8288 responses = self .llm .batch (batch_messages )
8389 for resp in responses :
8490 yield resp .text ()
85-
0 commit comments