Skip to content

Commit 4d2ecbf

Browse files
authored
Merge pull request #1672 from arash77/llm_hub_update_image_models
Add handling for image models
2 parents b712fb3 + fa47b94 commit 4d2ecbf

File tree

2 files changed

+89
-75
lines changed

2 files changed

+89
-75
lines changed

tools/llm_hub/llm_hub.py

Lines changed: 39 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,22 @@
1414

1515
litellm_config_file = os.environ.get("LITELLM_CONFIG_FILE")
1616
if not litellm_config_file:
17-
print("LITELLM_CONFIG_FILE environment variable is not set.")
18-
sys.exit(1)
17+
sys.exit("LITELLM_CONFIG_FILE environment variable is not set.")
1918
with open(litellm_config_file, "r") as f:
2019
config = yaml.safe_load(f)
2120

2221
litellm_api_key = config.get("LITELLM_API_KEY")
2322
litellm_base_url = config.get("LITELLM_BASE_URL")
2423

2524
if not litellm_api_key:
26-
print(
25+
sys.exit(
2726
"LiteLLM API key is not configured! Please set LITELLM_API_KEY environment variable."
2827
)
29-
sys.exit(1)
3028

3129
if not litellm_base_url:
32-
print(
30+
sys.exit(
3331
"LiteLLM base URL is not configured! Please set LITELLM_BASE_URL environment variable."
3432
)
35-
sys.exit(1)
3633

3734
client = OpenAI(
3835
api_key=litellm_api_key,
@@ -49,8 +46,7 @@ def read_text_file(file_path):
4946
with open(file_path, "r", encoding="latin-1") as f:
5047
return f.read()
5148
except Exception:
52-
print(f"Could not read file {file_path} as text")
53-
sys.exit(1)
49+
sys.exit(f"Could not read file {file_path} as text")
5450

5551

5652
def get_image_mime_type(image_path):
@@ -59,7 +55,7 @@ def get_image_mime_type(image_path):
5955
mime_type, _ = mimetypes.guess_type(image_path)
6056
if mime_type and mime_type.startswith("image/"):
6157
return mime_type
62-
if image_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif")):
58+
if image_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".tiff", ".bmp")):
6359
ext = image_path.lower().split(".")[-1]
6460
if ext == "jpg":
6561
ext = "jpeg"
@@ -76,53 +72,46 @@ def encode_image_to_base64(image_path):
7672
mime_type = get_image_mime_type(image_path)
7773
return f"data:{mime_type};base64,{base64_image}"
7874
except Exception:
79-
print(f"Could not process image file: {image_path}")
80-
sys.exit(1)
75+
sys.exit(f"Could not process image file: {image_path}")
8176

8277

83-
content_text = question
84-
messages = []
78+
valid_model_types = {
79+
"text": {"text"},
80+
"image": {"image"},
81+
"multimodal": {"text", "image"},
82+
}
8583

86-
valid_model_types = ["text", "multimodal"]
8784
if model_type not in valid_model_types:
88-
print(
85+
sys.exit(
8986
f"Invalid model_type '{model_type}'. Must be one of: {', '.join(valid_model_types)}"
9087
)
91-
sys.exit(1)
92-
93-
if context_files:
94-
context_text_parts = []
95-
image_contents = []
96-
97-
for file_path, file_type in context_files:
98-
if file_type == "image":
99-
if model_type == "multimodal":
100-
base64_image_url = encode_image_to_base64(file_path)
101-
image_contents.append(
102-
{"type": "image_url", "image_url": {"url": base64_image_url}}
103-
)
104-
else:
105-
print(
106-
f"Image file '{file_path}' provided, but model_type is not 'multimodal'."
107-
)
108-
sys.exit(1)
109-
else:
110-
text_content = read_text_file(file_path)
111-
context_text_parts.append(
112-
f"File: {file_path}\nContent:\n{text_content}\n---\n"
113-
)
114-
115-
if context_text_parts:
116-
context_text = "Context files:\n\n" + "\n".join(context_text_parts)
117-
content_text = f"{context_text}\n\nUser Question: {question}"
118-
119-
if model_type == "multimodal" and image_contents:
120-
content = [{"type": "text", "text": content_text}, *image_contents]
121-
messages = [{"role": "user", "content": content}]
88+
89+
contents = []
90+
for file_path, file_type in context_files:
91+
if file_type not in valid_model_types[model_type]:
92+
sys.exit(f"File type '{file_type}' not allowed for model_type '{model_type}'.")
93+
if file_type == "image":
94+
contents.append(
95+
{
96+
"type": "image_url",
97+
"image_url": {"url": encode_image_to_base64(file_path)},
98+
}
99+
)
122100
else:
123-
messages = [{"role": "user", "content": content_text}]
124-
else:
125-
messages = [{"role": "user", "content": content_text}]
101+
contents.append(
102+
{
103+
"type": "text",
104+
"text": f"File: {file_path}\nContent:\n{read_text_file(file_path)}",
105+
}
106+
)
107+
108+
if question and "text" in valid_model_types[model_type]:
109+
contents.append({"type": "text", "text": question})
110+
111+
if not contents:
112+
sys.exit("No input content provided.")
113+
114+
messages = [{"role": "user", "content": contents}]
126115

127116

128117
max_retries = config.get("MAX_RETRIES", 3)
@@ -135,8 +124,7 @@ def encode_image_to_base64(image_path):
135124
break
136125
except InternalServerError as e:
137126
if attempt == max_retries - 1:
138-
print("Max retries reached. Exiting.")
139-
sys.exit(1)
127+
sys.exit("Max retries reached. Exiting.")
140128
sleep_time = min(2**attempt + random.uniform(0, 1), max_delay)
141129
print(
142130
f"InternalServerError encountered ({e}). Retrying in {sleep_time:.2f} seconds..."

tools/llm_hub/llm_hub.xml

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<description>Call any LLM</description>
33
<macros>
44
<import>macros.xml</import>
5-
<token name="@VERSION_SUFFIX@">0</token>
5+
<token name="@VERSION_SUFFIX@">1</token>
66
<token name="@PROFILE@">24.0</token>
77
</macros>
88
<requirements>
@@ -14,28 +14,37 @@
1414
#import json
1515
#import os
1616
#import re
17+
1718
#set LINK_LIST = []
18-
#for $input in $context
19-
#set file_name = os.path.splitext($input.element_identifier)[0]
20-
#set ext = $input.ext if $input.ext in ['html', 'json', 'txt', 'jpg', 'jpeg', 'png', 'gif'] else 'txt'
21-
#set LINK = re.sub('[^\w\-]', '_', $file_name)+'.'+$ext
22-
ln -s '$input' '$LINK' &&
23-
#set type = 'image' if $input.ext in ['jpg', 'jpeg', 'png', 'gif'] else 'text'
24-
$LINK_LIST.append([$LINK, $type])
19+
#for $input_type, $param in [('text', 'text_context'), ('image', 'image_context')]
20+
#set context = $getVar($param, None)
21+
#if $model_type in [$input_type, 'multimodal'] and $context
22+
#for $input in ($context if isinstance($context, list) else [$context])
23+
#set file_name = re.sub('[^\w\-]', '_', os.path.splitext($input.element_identifier)[0])
24+
#set link_name = '%s_%s.%s' % ($input.hid, $file_name, $input.ext)
25+
ln -s '$input' '$link_name' &&
26+
$LINK_LIST.append(($link_name, $input_type))
27+
#end for
28+
#end if
2529
#end for
2630
#set context_files = json.dumps($LINK_LIST)
2731
32+
#if $model_type == 'image'
33+
#set prompt = ''
34+
#end if
35+
2836
python '$__tool_directory__/llm_hub.py'
2937
'$context_files'
3038
'$prompt'
3139
'$model.fields.value'
32-
'$input_type_selector'
40+
'$model_type'
3341
]]></command>
3442
<inputs>
35-
<conditional name="input_type">
36-
<param name="input_type_selector" type="select" label="Choose the model" help="Multimodal models are capable to have image and text as input.">
43+
<conditional name="model_condition">
44+
<param name="model_type" type="select" label="Choose the model" help="Multimodal models are capable to have image and text as input.">
3745
<option value="multimodal" selected="true">Multimodal models</option>
3846
<option value="text">Text models</option>
47+
<option value="image">Image models</option>
3948
</param>
4049
<when value="multimodal">
4150
<param name="model" type="select" optional="false" label="Model" help="Select the model you want to use.">
@@ -44,7 +53,11 @@ python '$__tool_directory__/llm_hub.py'
4453
</options>
4554
<validator message="No model annotation is available for LLM Hub" type="no_options"/>
4655
</param>
47-
<param name="context" type="data" multiple="true" optional="true" format="html,json,txt,jpg,png,gif" label="Context" max="500"/>
56+
<param name="text_context" type="data" multiple="true" optional="true" format="html,json,txt" label="Text Context"/>
57+
<param name="image_context" type="data" optional="true" format="jpg,png,gif,tiff,bmp" label="Image Context"/>
58+
<param name="prompt" type="text" optional="false" label="Prompt" help="Prompts or tasks you want the LLM to perform." area="true">
59+
<validator type="empty_field"/>
60+
</param>
4861
</when>
4962
<when value="text">
5063
<param name="model" type="select" optional="false" label="Model" help="Select the model you want to use.">
@@ -53,24 +66,33 @@ python '$__tool_directory__/llm_hub.py'
5366
</options>
5467
<validator message="No model annotation is available for LLM Hub" type="no_options"/>
5568
</param>
56-
<param name="context" type="data" multiple="true" optional="true" format="html,json,txt" label="Context" max="500"/>
69+
<param name="text_context" type="data" multiple="true" optional="true" format="html,json,txt" label="Text Context"/>
70+
<param name="prompt" type="text" optional="false" label="Prompt" help="Prompts or tasks you want the LLM to perform." area="true">
71+
<validator type="empty_field"/>
72+
</param>
73+
</when>
74+
<when value="image">
75+
<param name="model" type="select" optional="false" label="Model" help="Select the model you want to use.">
76+
<options from_data_table="llm_models">
77+
<filter type="static_value" column="2" value="image"/>
78+
</options>
79+
<validator message="No model annotation is available for LLM Hub" type="no_options"/>
80+
</param>
81+
<param name="image_context" type="data" optional="false" format="jpg,png,gif,tiff,bmp" label="Image Context"/>
5782
</when>
5883
</conditional>
59-
<param name="prompt" type="text" optional="false" label="Prompt" help="Prompts or tasks you want the LLM to perform." area="true">
60-
<validator type="empty_field"/>
61-
</param>
6284
</inputs>
6385
<outputs>
64-
<data name="output" format="markdown" label="${tool.name} on ${on_string}" from_work_dir="./output.md"/>
86+
<data name="output" format="markdown" label="${tool.name}(${model}) #if $on_string then ' on ' + $on_string else ''#" from_work_dir="./output.md"/>
6587
</outputs>
6688
<tests>
6789
<test expect_failure="true" expect_exit_code="1">
68-
<conditional name="input_type">
69-
<param name="input_type_selector" value="text"/>
90+
<conditional name="model_condition">
91+
<param name="model_type" value="text"/>
7092
<param name="model" value="unknown"/>
71-
<param name="context" value="test.txt" ftype="txt"/>
93+
<param name="text_context" value="test.txt" ftype="txt"/>
94+
<param name="prompt" value="What is this?"/>
7295
</conditional>
73-
<param name="prompt" value="What is this?"/>
7496
<assert_stdout>
7597
<has_text text="LiteLLM API key is not configured!"/>
7698
</assert_stdout>
@@ -98,9 +120,13 @@ Usage
98120
1. **Select a Model**: Choose the LLM model that best fits your needs.
99121
Available models depend on what's configured in the LiteLLM proxy by your Galaxy administrators.
100122
101-
2. **Upload Context Data**: You can upload files in formats such as TXT, HTML, JSON, JPG, PNG, or GIF.
102-
This context data serves as additional input for the prompt you wish to execute.
103-
Vision-capable models can process image files.
123+
2. **Upload Context Data**: You can upload context data in different ways depending on the model type:
124+
125+
- **Text models**: Upload multiple text files (TXT, HTML, JSON) as context
126+
- **Image models**: Upload a single image file (JPG, PNG, GIF, TIFF, BMP) as context
127+
- **Multimodal models**: Upload multiple text files and/or a single image file as context
128+
129+
Vision-capable (multimodal and image) models can process image files, but only one image file is supported per request.
104130
105131
3. **Provide a Prompt**: Specify the task or question you want the LLM to address.
106132
The more specific the prompt, the more tailored the response will be.

0 commit comments

Comments
 (0)