11import base64
22import re
3- from enum import StrEnum
43from typing import Any
54
65import httpx
6+ from langchain_core .language_models import BaseChatModel
77from uipath ._utils ._ssl_context import get_httpx_client_kwargs
88
9+ from uipath_langchain .chat .types import APIFlavor , LLMProvider
10+ from uipath_langchain .llm import get_api_flavor , get_llm_provider
11+
912IMAGE_MIME_TYPES : set [str ] = {
1013 "image/png" ,
1114 "image/jpeg" ,
1417}
1518
1619
17- class LlmProvider (StrEnum ):
18- OPENAI = "openai"
19- BEDROCK = "bedrock"
20- VERTEX = "vertex"
21- UNKNOWN = "unknown"
22-
23-
2420def is_pdf (mime_type : str ) -> bool :
2521 """Check if the MIME type represents a PDF document."""
2622 return mime_type .lower () == "application/pdf"
@@ -31,25 +27,6 @@ def is_image(mime_type: str) -> bool:
3127 return mime_type .lower () in IMAGE_MIME_TYPES
3228
3329
34- def detect_provider (model_name : str ) -> LlmProvider :
35- """Detect the LLM provider (Bedrock, OpenAI, or Vertex) based on the model name."""
36- if not model_name :
37- raise ValueError (f"Unsupported model: { model_name } " )
38-
39- model_lower = model_name .lower ()
40-
41- if "anthropic" in model_lower or "claude" in model_lower :
42- return LlmProvider .BEDROCK
43-
44- if "gpt" in model_lower :
45- return LlmProvider .OPENAI
46-
47- if "gemini" in model_lower :
48- return LlmProvider .VERTEX
49-
50- raise ValueError (f"Unsupported model: { model_name } " )
51-
52-
5330def sanitize_filename_for_anthropic (filename : str ) -> str :
5431 """Sanitize a filename to conform to Anthropic's document naming requirements."""
5532 if not filename or filename .isspace ():
@@ -85,24 +62,30 @@ async def build_message_content_part_from_data(
8562 url : str ,
8663 filename : str ,
8764 mime_type : str ,
88- model : str ,
65+ model : BaseChatModel ,
8966) -> dict [str , Any ]:
9067 """Download a file and build a provider-specific message content part.
9168
9269 The format varies based on the detected provider (Bedrock, OpenAI, or Vertex).
70+ Uses model.llm_provider and model.api_flavor attributes.
9371 """
94- provider = detect_provider (model )
72+ provider = get_llm_provider (model )
73+ api_flavor = get_api_flavor (model )
9574
96- if provider == LlmProvider .BEDROCK :
97- return await _build_bedrock_content_part_from_data (url , mime_type , filename )
75+ if provider == LLMProvider .BEDROCK :
76+ return await _build_bedrock_content_part_from_data (
77+ url , mime_type , filename , api_flavor
78+ )
9879
99- if provider == LlmProvider .OPENAI :
80+ if provider == LLMProvider .OPENAI :
10081 return await _build_openai_content_part_from_data (
101- url , mime_type , filename , False
82+ url , mime_type , filename , True , api_flavor
10283 )
10384
104- if provider == LlmProvider .VERTEX :
105- return await _build_vertex_content_part_from_data (url , mime_type , False )
85+ if provider == LLMProvider .VERTEX :
86+ return await _build_vertex_content_part_from_data (
87+ url , mime_type , True , api_flavor
88+ )
10689
10790 raise ValueError (f"Unsupported provider: { provider } " )
10891
@@ -111,8 +94,26 @@ async def _build_bedrock_content_part_from_data(
11194 url : str ,
11295 mime_type : str ,
11396 filename : str ,
97+ api_flavor : APIFlavor ,
98+ ) -> dict [str , Any ]:
99+ """Build a content part for AWS Bedrock (Anthropic Claude models).
100+
101+ Converse API uses raw bytes, Invoke API uses base64-encoded content.
102+ """
103+ if api_flavor == APIFlavor .AWS_BEDROCK_CONVERSE :
104+ return await _build_bedrock_converse_content_part (url , mime_type , filename )
105+ elif api_flavor == APIFlavor .AWS_BEDROCK_INVOKE :
106+ return await _build_bedrock_invoke_content_part (url , mime_type , filename )
107+ else :
108+ raise ValueError (f"Unsupported Bedrock api_flavor: { api_flavor } " )
109+
110+
111+ async def _build_bedrock_converse_content_part (
112+ url : str ,
113+ mime_type : str ,
114+ filename : str ,
114115) -> dict [str , Any ]:
115- """Build a content part for AWS Bedrock (Anthropic Claude models )."""
116+ """Build content part for Bedrock Converse API (PDFs use raw bytes, images use base64 )."""
116117 if is_pdf (mime_type ):
117118 file_bytes = await _download_file_bytes (url )
118119 name = filename .rsplit ("." , 1 )[0 ] if "." in filename else filename
@@ -143,39 +144,109 @@ async def _build_bedrock_content_part_from_data(
143144 raise ValueError (f"Unsupported mime_type: { mime_type } " )
144145
145146
147+ async def _build_bedrock_invoke_content_part (
148+ url : str ,
149+ mime_type : str ,
150+ filename : str ,
151+ ) -> dict [str , Any ]:
152+ """Build content part for Bedrock Invoke API (uses base64-encoded content)."""
153+ base64_content = await _download_file (url )
154+
155+ if is_pdf (mime_type ):
156+ return {
157+ "type" : "document" ,
158+ "source" : {
159+ "type" : "base64" ,
160+ "media_type" : mime_type ,
161+ "data" : base64_content ,
162+ },
163+ }
164+
165+ if is_image (mime_type ):
166+ return {
167+ "type" : "image" ,
168+ "source" : {
169+ "type" : "base64" ,
170+ "media_type" : mime_type ,
171+ "data" : base64_content ,
172+ },
173+ }
174+
175+ raise ValueError (f"Unsupported mime_type: { mime_type } " )
176+
177+
146178async def _build_openai_content_part_from_data (
147179 url : str ,
148180 mime_type : str ,
149181 filename : str ,
150- download_image : bool ,
182+ download_file : bool ,
183+ api_flavor : APIFlavor ,
151184) -> dict [str , Any ]:
152- """Build a content part for OpenAI models (base64-encoded or URL reference)."""
153- if download_image :
154- base64_content = await _download_file (url )
155- if is_image (mime_type ):
156- data_url = f"data:{ mime_type } ;base64,{ base64_content } "
157- return {
158- "type" : "input_image" ,
159- "image_url" : data_url ,
160- }
185+ """Build a content part for OpenAI models"""
186+ if api_flavor == APIFlavor .OPENAI_RESPONSES :
187+ return await _build_openai_responses_content_part (
188+ url , mime_type , filename , download_file
189+ )
190+ elif api_flavor == APIFlavor .OPENAI_COMPLETIONS :
191+ return await _build_openai_completions_content_part (
192+ url , mime_type , filename , download_file
193+ )
194+ else :
195+ raise ValueError (f"Unsupported OpenAI api_flavor: { api_flavor } " )
161196
162- if is_pdf (mime_type ):
163- data = f"data:application/pdf;base64,{ base64_content } "
164- return {
165- "type" : "file" ,
166- "file" : {
167- "filename" : filename ,
168- "file_data" : data ,
169- },
170- }
171197
172- elif is_image (mime_type ):
198+ async def _build_openai_responses_content_part (
199+ url : str ,
200+ mime_type : str ,
201+ filename : str ,
202+ download_file : bool ,
203+ ) -> dict [str , Any ]:
204+ """Build content part for OpenAI Responses API."""
205+ if download_file :
206+ return await _build_openai_responses_downloaded (url , mime_type , filename )
207+ return _build_openai_responses_from_url (url , mime_type )
208+
209+
210+ async def _build_openai_responses_downloaded (
211+ url : str ,
212+ mime_type : str ,
213+ filename : str ,
214+ ) -> dict [str , Any ]:
215+ """Build content part for OpenAI Responses API with downloaded file."""
216+ base64_content = await _download_file (url )
217+
218+ if is_image (mime_type ):
219+ data_url = f"data:{ mime_type } ;base64,{ base64_content } "
220+ return {
221+ "type" : "input_image" ,
222+ "image_url" : data_url ,
223+ }
224+
225+ if is_pdf (mime_type ):
226+ data = f"data:application/pdf;base64,{ base64_content } "
227+ return {
228+ "type" : "file" ,
229+ "file" : {
230+ "filename" : filename ,
231+ "file_data" : data ,
232+ },
233+ }
234+
235+ raise ValueError (f"Unsupported mime_type: { mime_type } " )
236+
237+
238+ def _build_openai_responses_from_url (
239+ url : str ,
240+ mime_type : str ,
241+ ) -> dict [str , Any ]:
242+ """Build content part for OpenAI Responses API with URL reference."""
243+ if is_image (mime_type ):
173244 return {
174245 "type" : "input_image" ,
175246 "image_url" : url ,
176247 }
177248
178- elif is_pdf (mime_type ):
249+ if is_pdf (mime_type ):
179250 return {
180251 "type" : "input_file" ,
181252 "file_url" : url ,
@@ -184,12 +255,70 @@ async def _build_openai_content_part_from_data(
184255 raise ValueError (f"Unsupported mime_type: { mime_type } " )
185256
186257
258+ async def _build_openai_completions_content_part (
259+ url : str ,
260+ mime_type : str ,
261+ filename : str ,
262+ download_file : bool ,
263+ ) -> dict [str , Any ]:
264+ """Build content part for OpenAI Completions API."""
265+ if download_file :
266+ return await _build_openai_completions_downloaded (url , mime_type , filename )
267+ return await _build_openai_completions_from_url (url , mime_type , filename )
268+
269+
270+ async def _build_openai_completions_downloaded (
271+ url : str ,
272+ mime_type : str ,
273+ filename : str ,
274+ ) -> dict [str , Any ]:
275+ """Build content part for OpenAI Completions API with downloaded file."""
276+ base64_content = await _download_file (url )
277+
278+ if is_image (mime_type ):
279+ data_url = f"data:{ mime_type } ;base64,{ base64_content } "
280+ return {
281+ "type" : "image_url" ,
282+ "image_url" : {"url" : data_url },
283+ }
284+
285+ if is_pdf (mime_type ):
286+ raise ValueError ("PDFs are not supported when using the OpenAi completions API" )
287+
288+ raise ValueError (f"Unsupported mime_type: { mime_type } " )
289+
290+
291+ async def _build_openai_completions_from_url (
292+ url : str ,
293+ mime_type : str ,
294+ filename : str ,
295+ ) -> dict [str , Any ]:
296+ """Build content part for OpenAI Completions API with URL reference."""
297+ if is_image (mime_type ):
298+ return {
299+ "type" : "image_url" ,
300+ "image_url" : {"url" : url },
301+ }
302+
303+ if is_pdf (mime_type ):
304+ raise ValueError ("PDFs are not supported when using the OpenAi completions API" )
305+
306+ raise ValueError (f"Unsupported mime_type: { mime_type } " )
307+
308+
187309async def _build_vertex_content_part_from_data (
188310 url : str ,
189311 mime_type : str ,
190312 download_file : bool ,
313+ api_flavor : APIFlavor ,
191314) -> dict [str , Any ]:
192315 """Build a content part for Google Vertex AI / Gemini models."""
316+
317+ if api_flavor != APIFlavor .VERTEX_GEMINI_GENERATE_CONTENT :
318+ raise ValueError (
319+ f"Unsupported api_flavor={ api_flavor } for building file content parts"
320+ )
321+
193322 if download_file :
194323 base64_content = await _download_file (url )
195324 if is_image (mime_type ) or is_pdf (mime_type ):
0 commit comments