Skip to content

Commit dad7a54

Browse files
committed
feat: picture description via langchain llm
Signed-off-by: Michele Dolfi <[email protected]>
1 parent cb9aa96 commit dad7a54

File tree

5 files changed

+453
-9
lines changed

5 files changed

+453
-9
lines changed

examples/docling_picture_description.ipynb

Lines changed: 168 additions & 0 deletions
Large diffs are not rendered by default.

langchain_docling/_plugins.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
def picture_description():
2+
from langchain_docling.picture_description import PictureDescriptionLangChainModel
3+
4+
return {
5+
"picture_description": [
6+
PictureDescriptionLangChainModel,
7+
]
8+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import base64
2+
from collections.abc import Iterable
3+
import io
4+
from pathlib import Path
5+
from typing import ClassVar, Literal, Optional, Type, Union
6+
7+
from PIL import Image
8+
9+
from docling.datamodel.accelerator_options import AcceleratorOptions
10+
from docling.datamodel.pipeline_options import (
11+
PictureDescriptionBaseOptions,
12+
)
13+
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
14+
from docling.models.utils.hf_model_download import (
15+
HuggingFaceModelDownloadMixin,
16+
)
17+
18+
from langchain_core.language_models.chat_models import BaseChatModel
19+
20+
class PictureDescriptionLangChainOptions(PictureDescriptionBaseOptions):
21+
kind: ClassVar[Literal["langchain"]] = "langchain"
22+
llm: BaseChatModel
23+
prompt: str = "Describe this document picture in a few sentences."
24+
provenance: Optional[str] = None
25+
26+
27+
28+
class PictureDescriptionLangChainModel(
29+
PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin
30+
):
31+
@classmethod
32+
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
33+
return PictureDescriptionLangChainOptions
34+
35+
def __init__(
36+
self,
37+
enabled: bool,
38+
enable_remote_services: bool,
39+
artifacts_path: Optional[Union[Path, str]],
40+
options: PictureDescriptionLangChainOptions,
41+
accelerator_options: AcceleratorOptions,
42+
):
43+
super().__init__(
44+
enabled=enabled,
45+
enable_remote_services=enable_remote_services,
46+
artifacts_path=artifacts_path,
47+
options=options,
48+
accelerator_options=accelerator_options,
49+
)
50+
self.options: PictureDescriptionLangChainOptions
51+
52+
if self.enabled:
53+
self.llm = self.options.llm
54+
self.provenance = "langchain"
55+
if self.options.provenance:
56+
self.provenance += f"-{self.options.provenance}"
57+
58+
def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
59+
60+
# Create input messages
61+
batch_messages = []
62+
63+
for image in images:
64+
# image.show()
65+
# image_data = base64.b64encode(image.tobytes()).decode("utf-8")
66+
buffered = io.BytesIO()
67+
image.save(buffered, format="PNG")
68+
image_data = base64.b64encode(buffered.getvalue()).decode("utf-8")
69+
batch_messages.append([
70+
{
71+
"role": "user",
72+
"content": [
73+
{"type": "text", "text": self.options.prompt},
74+
{
75+
"type": "image_url",
76+
"image_url": {"url": f"data:image/png;base64,{image_data}"},
77+
},
78+
],
79+
}]
80+
)
81+
82+
responses = self.llm.batch(batch_messages)
83+
for resp in responses:
84+
yield resp.text()
85+

pyproject.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ dependencies = [
3636
"docling~=2.18",
3737
]
3838

39+
[project.optional-dependencies]
40+
plugin = [
41+
"langchain-openai>=0.2.12",
42+
]
43+
3944
[project.urls]
4045
homepage = "https://github.com/docling-project"
4146
repository = "https://github.com/docling-project/docling-langchain"
@@ -72,6 +77,9 @@ default-groups = "all"
7277
[tool.setuptools.packages.find]
7378
include = ["langchain_docling*"]
7479

80+
[project.entry-points."docling"]
81+
langchain_docling = "langchain_docling._plugins"
82+
7583
[tool.black]
7684
line-length = 88
7785
target-version = ["py39", "py310"]

0 commit comments

Comments
 (0)