Skip to content

Commit 3a73309

Browse files
Add bedrock llama4 pricing + handle llama4 templating on bedrock invoke route (#10582)
* build(model_prices_and_context_window.json): add bedrock llama4 models to model cost map * fix template conversion for Llama 4 models in Bedrock (#10557) * test: add testing to repro #10557 * test: add unit testing * test(test_main.py): refactor where test is kept --------- Co-authored-by: aswny <[email protected]>
1 parent ae2a9cf commit 3a73309

File tree

6 files changed

+191
-39
lines changed

6 files changed

+191
-39
lines changed

litellm/litellm_core_utils/prompt_templates/factory.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3633,7 +3633,7 @@ def prompt_factory(
36333633
return mistral_instruct_pt(messages=messages)
36343634
elif "llama2" in model and "chat" in model:
36353635
return llama_2_chat_pt(messages=messages)
3636-
elif "llama3" in model and "instruct" in model:
3636+
elif ("llama3" in model or "llama4" in model) and "instruct" in model:
36373637
return hf_chat_template(
36383638
model="meta-llama/Meta-Llama-3-8B-Instruct",
36393639
messages=messages,

litellm/model_prices_and_context_window_backup.json

+60
Original file line numberDiff line numberDiff line change
@@ -10105,6 +10105,66 @@
1010510105
"supports_function_calling": true,
1010610106
"supports_tool_choice": false
1010710107
},
10108+
"meta.llama4-maverick-17b-instruct-v1:0": {
10109+
"max_tokens": 4096,
10110+
"max_input_tokens": 128000,
10111+
"max_output_tokens": 4096,
10112+
"input_cost_per_token": 0.00024e-3,
10113+
"input_cost_per_token_batches": 0.00012e-3,
10114+
"output_cost_per_token": 0.00097e-3,
10115+
"output_cost_per_token_batches": 0.000485e-3,
10116+
"litellm_provider": "bedrock_converse",
10117+
"mode": "chat",
10118+
"supports_function_calling": true,
10119+
"supports_tool_choice": false,
10120+
"supported_modalities": ["text", "image"],
10121+
"supported_output_modalities": ["text", "code"]
10122+
},
10123+
"us.meta.llama4-maverick-17b-instruct-v1:0": {
10124+
"max_tokens": 4096,
10125+
"max_input_tokens": 128000,
10126+
"max_output_tokens": 4096,
10127+
"input_cost_per_token": 0.00024e-3,
10128+
"input_cost_per_token_batches": 0.00012e-3,
10129+
"output_cost_per_token": 0.00097e-3,
10130+
"output_cost_per_token_batches": 0.000485e-3,
10131+
"litellm_provider": "bedrock_converse",
10132+
"mode": "chat",
10133+
"supports_function_calling": true,
10134+
"supports_tool_choice": false,
10135+
"supported_modalities": ["text", "image"],
10136+
"supported_output_modalities": ["text", "code"]
10137+
},
10138+
"meta.llama4-scout-17b-instruct-v1:0": {
10139+
"max_tokens": 4096,
10140+
"max_input_tokens": 128000,
10141+
"max_output_tokens": 4096,
10142+
"input_cost_per_token": 0.00017e-3,
10143+
"input_cost_per_token_batches": 0.000085e-3,
10144+
"output_cost_per_token": 0.00066e-3,
10145+
"output_cost_per_token_batches": 0.00033e-3,
10146+
"litellm_provider": "bedrock_converse",
10147+
"mode": "chat",
10148+
"supports_function_calling": true,
10149+
"supports_tool_choice": false,
10150+
"supported_modalities": ["text", "image"],
10151+
"supported_output_modalities": ["text", "code"]
10152+
},
10153+
"us.meta.llama4-scout-17b-instruct-v1:0": {
10154+
"max_tokens": 4096,
10155+
"max_input_tokens": 128000,
10156+
"max_output_tokens": 4096,
10157+
"input_cost_per_token": 0.00017e-3,
10158+
"input_cost_per_token_batches": 0.000085e-3,
10159+
"output_cost_per_token": 0.00066e-3,
10160+
"output_cost_per_token_batches": 0.00033e-3,
10161+
"litellm_provider": "bedrock_converse",
10162+
"mode": "chat",
10163+
"supports_function_calling": true,
10164+
"supports_tool_choice": false,
10165+
"supported_modalities": ["text", "image"],
10166+
"supported_output_modalities": ["text", "code"]
10167+
},
1010810168
"512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
1010910169
"max_tokens": 77,
1011010170
"max_input_tokens": 77,

model_prices_and_context_window.json

+60
Original file line numberDiff line numberDiff line change
@@ -10105,6 +10105,66 @@
1010510105
"supports_function_calling": true,
1010610106
"supports_tool_choice": false
1010710107
},
10108+
"meta.llama4-maverick-17b-instruct-v1:0": {
10109+
"max_tokens": 4096,
10110+
"max_input_tokens": 128000,
10111+
"max_output_tokens": 4096,
10112+
"input_cost_per_token": 0.00024e-3,
10113+
"input_cost_per_token_batches": 0.00012e-3,
10114+
"output_cost_per_token": 0.00097e-3,
10115+
"output_cost_per_token_batches": 0.000485e-3,
10116+
"litellm_provider": "bedrock_converse",
10117+
"mode": "chat",
10118+
"supports_function_calling": true,
10119+
"supports_tool_choice": false,
10120+
"supported_modalities": ["text", "image"],
10121+
"supported_output_modalities": ["text", "code"]
10122+
},
10123+
"us.meta.llama4-maverick-17b-instruct-v1:0": {
10124+
"max_tokens": 4096,
10125+
"max_input_tokens": 128000,
10126+
"max_output_tokens": 4096,
10127+
"input_cost_per_token": 0.00024e-3,
10128+
"input_cost_per_token_batches": 0.00012e-3,
10129+
"output_cost_per_token": 0.00097e-3,
10130+
"output_cost_per_token_batches": 0.000485e-3,
10131+
"litellm_provider": "bedrock_converse",
10132+
"mode": "chat",
10133+
"supports_function_calling": true,
10134+
"supports_tool_choice": false,
10135+
"supported_modalities": ["text", "image"],
10136+
"supported_output_modalities": ["text", "code"]
10137+
},
10138+
"meta.llama4-scout-17b-instruct-v1:0": {
10139+
"max_tokens": 4096,
10140+
"max_input_tokens": 128000,
10141+
"max_output_tokens": 4096,
10142+
"input_cost_per_token": 0.00017e-3,
10143+
"input_cost_per_token_batches": 0.000085e-3,
10144+
"output_cost_per_token": 0.00066e-3,
10145+
"output_cost_per_token_batches": 0.00033e-3,
10146+
"litellm_provider": "bedrock_converse",
10147+
"mode": "chat",
10148+
"supports_function_calling": true,
10149+
"supports_tool_choice": false,
10150+
"supported_modalities": ["text", "image"],
10151+
"supported_output_modalities": ["text", "code"]
10152+
},
10153+
"us.meta.llama4-scout-17b-instruct-v1:0": {
10154+
"max_tokens": 4096,
10155+
"max_input_tokens": 128000,
10156+
"max_output_tokens": 4096,
10157+
"input_cost_per_token": 0.00017e-3,
10158+
"input_cost_per_token_batches": 0.000085e-3,
10159+
"output_cost_per_token": 0.00066e-3,
10160+
"output_cost_per_token_batches": 0.00033e-3,
10161+
"litellm_provider": "bedrock_converse",
10162+
"mode": "chat",
10163+
"supports_function_calling": true,
10164+
"supports_tool_choice": false,
10165+
"supported_modalities": ["text", "image"],
10166+
"supported_output_modalities": ["text", "code"]
10167+
},
1010810168
"512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
1010910169
"max_tokens": 77,
1011010170
"max_input_tokens": 77,

tests/litellm/litellm_core_utils/prompt_templates/test_litellm_core_utils_prompt_templates_factory.py

+37-29
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1-
import pytest
21
import json
32
from unittest.mock import patch
3+
4+
import pytest
5+
46
import litellm
5-
from litellm.litellm_core_utils.prompt_templates.factory import ollama_pt, BAD_MESSAGE_ERROR_STR
7+
from litellm.litellm_core_utils.prompt_templates.factory import (
8+
BAD_MESSAGE_ERROR_STR,
9+
ollama_pt,
10+
)
11+
612

713
def test_ollama_pt_simple_messages():
814
"""Test basic functionality with simple text messages"""
@@ -11,14 +17,15 @@ def test_ollama_pt_simple_messages():
1117
{"role": "assistant", "content": "How can I help you?"},
1218
{"role": "user", "content": "Hello"},
1319
]
14-
20+
1521
result = ollama_pt(model="llama2", messages=messages)
16-
22+
1723
expected_prompt = "### System:\nYou are a helpful assistant\n\n### Assistant:\nHow can I help you?\n\n### User:\nHello\n\n"
1824
assert isinstance(result, dict)
1925
assert result["prompt"] == expected_prompt
2026
assert result["images"] == []
2127

28+
2229
def test_ollama_pt_consecutive_user_messages():
2330
"""Test handling consecutive user messages"""
2431
messages = [
@@ -28,14 +35,15 @@ def test_ollama_pt_consecutive_user_messages():
2835
{"role": "assistant", "content": "I'm good, thanks!"},
2936
{"role": "user", "content": "I am well too."},
3037
]
31-
38+
3239
result = ollama_pt(model="llama2", messages=messages)
33-
40+
3441
# Consecutive user messages should be merged
3542
expected_prompt = "### User:\nHello\n\n### Assistant:\nHow can I help you?\n\n### User:\nHow are you?\n\n### Assistant:\nI'm good, thanks!\n\n### User:\nI am well too.\n\n"
3643
assert isinstance(result, dict)
3744
assert result["prompt"] == expected_prompt
3845

46+
3947
# def test_ollama_pt_consecutive_system_messages():
4048
# """Test handling consecutive system messages"""
4149
# messages = [
@@ -44,9 +52,9 @@ def test_ollama_pt_consecutive_user_messages():
4452
# {"role": "system", "content": "Be concise and polite"},
4553
# {"role": "assistant", "content": "How can I help you?"}
4654
# ]
47-
55+
4856
# result = ollama_pt(model="llama2", messages=messages)
49-
57+
5058
# # Consecutive system messages should be merged
5159
# expected_prompt = "### User:\nHello\n\n### System:\nYou are a helpful assistantBe concise and polite\n\n### Assistant:\nHow can I help you?\n\n"
5260
# assert result == expected_prompt
@@ -59,9 +67,9 @@ def test_ollama_pt_consecutive_user_messages():
5967
# {"role": "assistant", "content": "How can I help you?"},
6068
# {"role": "user", "content": "Tell me a joke"}
6169
# ]
62-
70+
6371
# result = ollama_pt(model="llama2", messages=messages)
64-
72+
6573
# # Consecutive assistant messages should be merged
6674
# expected_prompt = "### User:\nHello\n\n### Assistant:\nHi there!How can I help you?\n\n### User:\nTell me a joke\n\n"
6775
# assert result["prompt"] == expected_prompt
@@ -75,9 +83,9 @@ def test_ollama_pt_consecutive_user_messages():
7583
# ]},
7684
# {"role": "assistant", "content": "That's a cat."}
7785
# ]
78-
86+
7987
# result = ollama_pt(model="llama2", messages=messages)
80-
88+
8189
# expected_prompt = "### User:\nWhat's in this image?\n\n### Assistant:\nThat's a cat.\n\n"
8290
# assert result["prompt"] == expected_prompt
8391
# assert result["images"] == ["http://example.com/image.jpg"]
@@ -91,9 +99,9 @@ def test_ollama_pt_consecutive_user_messages():
9199
# ]},
92100
# {"role": "assistant", "content": "That's a cat."}
93101
# ]
94-
102+
95103
# result = ollama_pt(model="llama2", messages=messages)
96-
104+
97105
# expected_prompt = "### User:\nWhat's in this image?\n\n### Assistant:\nThat's a cat.\n\n"
98106
# assert result["prompt"] == expected_prompt
99107
# assert result["images"] == ["http://example.com/image.jpg"]
@@ -116,9 +124,9 @@ def test_ollama_pt_consecutive_user_messages():
116124
# },
117125
# {"role": "tool", "content": "Sunny, 72°F"}
118126
# ]
119-
127+
120128
# result = ollama_pt(model="llama2", messages=messages)
121-
129+
122130
# # Check if tool call is included in the prompt
123131
# assert "### User:\nWhat's the weather in San Francisco?" in result["prompt"]
124132
# assert "### Assistant:\nI'll check the weather for you.Tool Calls:" in result["prompt"]
@@ -131,18 +139,18 @@ def test_ollama_pt_consecutive_user_messages():
131139
# messages = [
132140
# {"role": "invalid_role", "content": "This is an invalid role"}
133141
# ]
134-
142+
135143
# with pytest.raises(litellm.BadRequestError) as excinfo:
136144
# ollama_pt(model="llama2", messages=messages)
137-
145+
138146
# assert BAD_MESSAGE_ERROR_STR in str(excinfo.value)
139147

140148
# def test_ollama_pt_empty_messages():
141149
# """Test with empty messages list"""
142150
# messages = []
143-
151+
144152
# result = ollama_pt(model="llama2", messages=messages)
145-
153+
146154
# assert result["prompt"] == ""
147155
# assert result["images"] == []
148156

@@ -155,9 +163,9 @@ def test_ollama_pt_consecutive_user_messages():
155163
# {"role": "assistant", "content": "To get to the other side!"},
156164
# {"role": "tool", "content": "Joke rating: 5/10"}
157165
# ]
158-
166+
159167
# result = ollama_pt(model="llama2", messages=messages)
160-
168+
161169
# assert "### User:\nTell me a joke" in result["prompt"]
162170
# assert "### Assistant:\nWhy did the chicken cross the road?" in result["prompt"]
163171
# assert "### User:\nWhy?" in result["prompt"]
@@ -171,9 +179,9 @@ def test_ollama_pt_consecutive_user_messages():
171179
# {"role": "function", "content": "The result is 4"},
172180
# {"role": "assistant", "content": "The answer is 4."}
173181
# ]
174-
182+
175183
# result = ollama_pt(model="llama2", messages=messages)
176-
184+
177185
# assert "### User:\nWhat's 2+2?The result is 4\n\n" in result["prompt"]
178186
# assert "### Assistant:\nThe answer is 4.\n\n" in result["prompt"]
179187

@@ -187,9 +195,9 @@ def test_ollama_pt_consecutive_user_messages():
187195
# ]},
188196
# {"role": "assistant", "content": "Both images show cats, but different breeds."}
189197
# ]
190-
198+
191199
# result = ollama_pt(model="llama2", messages=messages)
192-
200+
193201
# expected_prompt = "### User:\nCompare these images:\n\n### Assistant:\nBoth images show cats, but different breeds.\n\n"
194202
# assert result["prompt"] == expected_prompt
195203
# assert result["images"] == ["http://example.com/image1.jpg", "http://example.com/image2.jpg"]
@@ -206,12 +214,12 @@ def test_ollama_pt_consecutive_user_messages():
206214
# {"role": "system", "content": "Be helpful"},
207215
# {"role": "assistant", "content": "I see a cat in the image."}
208216
# ]
209-
217+
210218
# result = ollama_pt(model="llama2", messages=messages)
211-
219+
212220
# assert "### User:\nHello\n\n" in result["prompt"]
213221
# assert "### Assistant:\nHi there!\n\n" in result["prompt"]
214222
# assert "### User:\nLook at this:\n\n" in result["prompt"]
215223
# assert "### System:\nBe helpful\n\n" in result["prompt"]
216224
# assert "### Assistant:\nI see a cat in the image.\n\n" in result["prompt"]
217-
# assert result["images"] == ["http://example.com/image.jpg"]
225+
# assert result["images"] == ["http://example.com/image.jpg"]

0 commit comments

Comments
 (0)