feat(aws-bedrock): update model YAMLs [bot]#1603
Conversation
|
/test-models |
Gateway test results
Failures (8)
ErrorCode snippetfrom openai import OpenAI
client = OpenAI(api_key="***", base_url="https://internal.devtest.truefoundry.tech/api/llm")
response = client.chat.completions.create(
model="test-v2-aws-bedrock/us.mistral.pixtral-large-2502-v1-0",
messages=[
{"role": "system", "content": "You are a helpful assistant. Respond in JSON format."},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hi, how can I help you"},
{"role": "user", "content": "List 3 colors with their hex codes in JSON."},
],
response_format={"type": "json_object"},
stream=True,
)
import json as _json
_accumulated = ""
for chunk in response:
if chunk.choices and len(chunk.choices) > 0:
delta = chunk.choices[0].delta
if delta.content is not None:
_accumulated += delta.content
print(delta.content, end="", flush=True)
if not _accumulated:
raise Exception("VALIDATION FAILED: json-output stream - no content received")
_json.loads(_accumulated)
print("\nVALIDATION: json-output stream SUCCESS")OutputTraceback (most recent call last): OutputTraceback (most recent call last): OutputTraceback (most recent call last): OutputTraceback (most recent call last): Output
ErrorCode snippetimport boto3
from botocore.config import Config
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_model = "test-v2-aws-bedrock/us.amazon.nova-2-lite-v1-0"
client = boto3.client(
"bedrock-runtime",
region_name="us-east-1",
endpoint_url=_endpoint,
aws_access_key_id="dummy",
aws_secret_access_key="dummy",
config=Config(inject_host_prefix=False),
)
def _add_auth_header(request, **kwargs):
request.headers["x-tfy-api-key"] = _api_key
client.meta.events.register("before-sign.bedrock-runtime.*", _add_auth_header)
messages = [
{"role": "user", "content": [{"text": "Hi"}]},
{"role": "assistant", "content": [{"text": "Hi, how can I help you"}]},
{"role": "user", "content": [{"text": "How to calculate 3^3^3^3? Think step by step and show all reasoning."}]},
]
system = [{"text": "You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps."}]
response = client.converse_stream(
modelId=_model,
system=system,
messages=messages,
)
_events = []
for _event in response["stream"]:
_events.append(_event)
if "contentBlockDelta" in _event:
_delta = _event["contentBlockDelta"].get("delta", {})
if "reasoningContent" in _delta:
print(_delta["reasoningContent"].get("text", ""), end="", flush=True)
if "text" in _delta:
print(_delta["text"], end="", flush=True)
_reasoning_detected = False
for _event in _events:
if "contentBlockDelta" in _event:
_delta = _event["contentBlockDelta"].get("delta", {})
if "reasoningContent" in _delta:
_reasoning_detected = True
if "contentBlockStart" in _event:
_start = _event["contentBlockStart"].get("start", {})
if "reasoningContent" in _start:
_reasoning_detected = True
if "metadata" in _event:
_usage = _event["metadata"].get("usage", {})
if _usage.get("reasoning_tokens") or _usage.get("reasoningTokens"):
_reasoning_detected = True
if not _reasoning_detected:
raise Exception("VALIDATION FAILED: reasoning stream - no reasoning information in Bedrock stream")
print("\nVALIDATION: reasoning stream SUCCESS")Output
ErrorCode snippetfrom openai import OpenAI
client = OpenAI(api_key="***", base_url="https://internal.devtest.truefoundry.tech/api/llm")
response = client.chat.completions.create(
model="test-v2-aws-bedrock/us.amazon.nova-2-lite-v1-0",
messages=[
{"role": "system", "content": "You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps."},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hi, how can I help you"},
{"role": "user", "content": "How to calculate 3^3^3^3? Think step by step and show all reasoning."},
],
reasoning_effort="medium",
stream=True,
)
_reasoning_detected = False
for chunk in response:
if chunk.choices and len(chunk.choices) > 0:
delta = chunk.choices[0].delta
if delta.content is not None:
print(delta.content, end="", flush=True)
if getattr(delta, "reasoning_content", None) is not None:
_reasoning_detected = True
if getattr(delta, "reasoning", None) is not None:
_reasoning_detected = True
_usage = getattr(chunk, "usage", None)
if _usage is not None:
_details = getattr(_usage, "completion_tokens_details", None)
if _details and getattr(_details, "reasoning_tokens", 0) > 0:
_reasoning_detected = True
if not _reasoning_detected:
raise Exception("VALIDATION FAILED: reasoning stream - no reasoning information in stream")
print("\nVALIDATION: reasoning stream SUCCESS")Output
ErrorCode snippetfrom openai import OpenAI
client = OpenAI(api_key="***", base_url="https://internal.devtest.truefoundry.tech/api/llm")
response = client.chat.completions.create(
model="test-v2-aws-bedrock/us.amazon.nova-2-lite-v1-0",
messages=[
{"role": "system", "content": "You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps."},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hi, how can I help you"},
{"role": "user", "content": "How to calculate 3^3^3^3? Think step by step and show all reasoning."},
],
reasoning_effort="medium",
stream=False,
)
_usage = getattr(response, "usage", None)
_reasoning_detected = False
_choices = getattr(response, "choices", None)
if _choices and len(_choices) > 0:
_message = getattr(_choices[0], "message", None)
else:
_message = None
if _message and getattr(_message, "content", None) is not None:
print(_message.content)
if _usage is not None:
_output_token_details = getattr(_usage, "completion_tokens_details", None)
if _output_token_details and getattr(_output_token_details, "reasoning_tokens", 0) > 0:
_reasoning_detected = True
elif getattr(_usage, "reasoning", None) is not None:
_reasoning_detected = True
if getattr(_message, "reasoning_content", None) is not None:
_reasoning_detected = True
elif getattr(_message, "reasoning", None) is not None:
_reasoning_detected = True
if not _reasoning_detected:
print("Response: ", response)
raise Exception("VALIDATION FAILED: reasoning - no reasoning information in response")
print("VALIDATION: reasoning SUCCESS")OutputSuccesses (193)
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
OutputSkipped (19)
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason |
|
/test-models |
There was a problem hiding this comment.
Cursor Bugbot has reviewed your changes and found 3 potential issues.
❌ Bugbot Autofix is OFF. To automatically fix reported issues with cloud agents, enable autofix in the Cursor dashboard.
Reviewed by Cursor Bugbot for commit 1018200. Configure here.
| - input_cost_per_token: 4.5e-7 | ||
| output_cost_per_token: 0.0000018 | ||
| region: eu-north-1 | ||
| - region: ap-southeast-4 # not found in official docs |
There was a problem hiding this comment.
Region costs missing token rates
Medium Severity
New costs rows for us-east-1, sa-east-1, and ap-southeast-4 list only region (with notes that pricing was not in docs) and omit input_cost_per_token and output_cost_per_token. Region-based cost lookup can treat usage in those regions as free or fail to price requests correctly.
Reviewed by Cursor Bugbot for commit 1018200. Configure here.
| region: eu-central-1 | ||
| - input_cost_per_token: 1.8e-7 | ||
| output_cost_per_token: 7.8e-7 | ||
| region: ap-southeast-3 |
There was a problem hiding this comment.
Nemotron 30B wrong regional rates
Medium Severity
eu-north-1, eu-central-1, and ap-southeast-3 were added with input_cost_per_token 1.8e-7 and output_cost_per_token 7.8e-7, roughly 2.5× this model’s eu-west-1 rates (7e-8 / 2.8e-7). The sibling nvidia.nemotron-nano-12b-v2 YAML uses the same per-token values for those regions as for eu-west-1.
Reviewed by Cursor Bugbot for commit 1018200. Configure here.
| - function_calling | ||
| - tool_choice | ||
| - system_messages | ||
| - json_output |
There was a problem hiding this comment.
Pixtral US drops json_output
Low Severity
This commit removes the json_output feature from us.mistral.pixtral-large-2502-v1:0 while eu.mistral.pixtral-large-2502-v1:0 still declares json_output. Gateways that gate JSON response behavior on features may disable JSON output for the US inference profile only.
Reviewed by Cursor Bugbot for commit 1018200. Configure here.
Gateway test results
Failures (5)
ErrorCode snippetimport boto3
from botocore.config import Config
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_model = "test-v2-aws-bedrock/us.amazon.nova-2-lite-v1-0"
client = boto3.client(
"bedrock-runtime",
region_name="us-east-1",
endpoint_url=_endpoint,
aws_access_key_id="dummy",
aws_secret_access_key="dummy",
config=Config(inject_host_prefix=False),
)
def _add_auth_header(request, **kwargs):
request.headers["x-tfy-api-key"] = _api_key
client.meta.events.register("before-sign.bedrock-runtime.*", _add_auth_header)
messages = [
{"role": "user", "content": [{"text": "Hi"}]},
{"role": "assistant", "content": [{"text": "Hi, how can I help you"}]},
{"role": "user", "content": [{"text": "How to calculate 3^3^3^3? Think step by step and show all reasoning."}]},
]
system = [{"text": "You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps."}]
response = client.converse(
modelId=_model,
system=system,
messages=messages,
)
_content = response["output"]["message"]["content"]
for _block in _content:
if "reasoningContent" in _block:
print(_block["reasoningContent"]["reasoningText"]["text"])
if "text" in _block:
print(_block["text"])
_content = response["output"]["message"]["content"]
_reasoning_detected = False
for _block in _content:
if "reasoningContent" in _block:
_reasoning_detected = True
_usage = response.get("usage", {})
if _usage.get("reasoning_tokens") or _usage.get("reasoningTokens"):
_reasoning_detected = True
if not _reasoning_detected:
print("Response: ", response)
raise Exception("VALIDATION FAILED: reasoning - no reasoning information in Bedrock response")
print("VALIDATION: reasoning SUCCESS")Output
ErrorCode snippetfrom openai import OpenAI
client = OpenAI(api_key="***", base_url="https://internal.devtest.truefoundry.tech/api/llm")
response = client.chat.completions.create(
model="test-v2-aws-bedrock/us.amazon.nova-2-lite-v1-0",
messages=[
{"role": "system", "content": "You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps."},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hi, how can I help you"},
{"role": "user", "content": "How to calculate 3^3^3^3? Think step by step and show all reasoning."},
],
reasoning_effort="medium",
stream=False,
)
_usage = getattr(response, "usage", None)
_reasoning_detected = False
_choices = getattr(response, "choices", None)
if _choices and len(_choices) > 0:
_message = getattr(_choices[0], "message", None)
else:
_message = None
if _message and getattr(_message, "content", None) is not None:
print(_message.content)
if _usage is not None:
_output_token_details = getattr(_usage, "completion_tokens_details", None)
if _output_token_details and getattr(_output_token_details, "reasoning_tokens", 0) > 0:
_reasoning_detected = True
elif getattr(_usage, "reasoning", None) is not None:
_reasoning_detected = True
if getattr(_message, "reasoning_content", None) is not None:
_reasoning_detected = True
elif getattr(_message, "reasoning", None) is not None:
_reasoning_detected = True
if not _reasoning_detected:
print("Response: ", response)
raise Exception("VALIDATION FAILED: reasoning - no reasoning information in response")
print("VALIDATION: reasoning SUCCESS")Output
ErrorCode snippetimport boto3
from botocore.config import Config
_endpoint = "https://internal.devtest.truefoundry.tech/api/llm"
_api_key = "***"
_model = "test-v2-aws-bedrock/us.amazon.nova-2-lite-v1-0"
client = boto3.client(
"bedrock-runtime",
region_name="us-east-1",
endpoint_url=_endpoint,
aws_access_key_id="dummy",
aws_secret_access_key="dummy",
config=Config(inject_host_prefix=False),
)
def _add_auth_header(request, **kwargs):
request.headers["x-tfy-api-key"] = _api_key
client.meta.events.register("before-sign.bedrock-runtime.*", _add_auth_header)
messages = [
{"role": "user", "content": [{"text": "Hi"}]},
{"role": "assistant", "content": [{"text": "Hi, how can I help you"}]},
{"role": "user", "content": [{"text": "How to calculate 3^3^3^3? Think step by step and show all reasoning."}]},
]
system = [{"text": "You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps."}]
response = client.converse_stream(
modelId=_model,
system=system,
messages=messages,
)
_events = []
for _event in response["stream"]:
_events.append(_event)
if "contentBlockDelta" in _event:
_delta = _event["contentBlockDelta"].get("delta", {})
if "reasoningContent" in _delta:
print(_delta["reasoningContent"].get("text", ""), end="", flush=True)
if "text" in _delta:
print(_delta["text"], end="", flush=True)
_reasoning_detected = False
for _event in _events:
if "contentBlockDelta" in _event:
_delta = _event["contentBlockDelta"].get("delta", {})
if "reasoningContent" in _delta:
_reasoning_detected = True
if "contentBlockStart" in _event:
_start = _event["contentBlockStart"].get("start", {})
if "reasoningContent" in _start:
_reasoning_detected = True
if "metadata" in _event:
_usage = _event["metadata"].get("usage", {})
if _usage.get("reasoning_tokens") or _usage.get("reasoningTokens"):
_reasoning_detected = True
if not _reasoning_detected:
raise Exception("VALIDATION FAILED: reasoning stream - no reasoning information in Bedrock stream")
print("\nVALIDATION: reasoning stream SUCCESS")Output
ErrorCode snippetfrom openai import OpenAI
client = OpenAI(api_key="***", base_url="https://internal.devtest.truefoundry.tech/api/llm")
response = client.chat.completions.create(
model="test-v2-aws-bedrock/us.amazon.nova-2-lite-v1-0",
messages=[
{"role": "system", "content": "You are a helpful assistant. You MUST think step by step and show your reasoning. Never skip reasoning steps."},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hi, how can I help you"},
{"role": "user", "content": "How to calculate 3^3^3^3? Think step by step and show all reasoning."},
],
reasoning_effort="medium",
stream=True,
)
_reasoning_detected = False
for chunk in response:
if chunk.choices and len(chunk.choices) > 0:
delta = chunk.choices[0].delta
if delta.content is not None:
print(delta.content, end="", flush=True)
if getattr(delta, "reasoning_content", None) is not None:
_reasoning_detected = True
if getattr(delta, "reasoning", None) is not None:
_reasoning_detected = True
_usage = getattr(chunk, "usage", None)
if _usage is not None:
_details = getattr(_usage, "completion_tokens_details", None)
if _details and getattr(_details, "reasoning_tokens", 0) > 0:
_reasoning_detected = True
if not _reasoning_detected:
raise Exception("VALIDATION FAILED: reasoning stream - no reasoning information in stream")
print("\nVALIDATION: reasoning stream SUCCESS")Output
ErrorCode snippetfrom openai import OpenAI
client = OpenAI(api_key="***", base_url="https://internal.devtest.truefoundry.tech/api/llm")
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name, e.g. London",
},
},
"required": ["location"],
"additionalProperties": False,
},
"strict": True,
},
},
]
response = client.chat.completions.create(
model="test-v2-aws-bedrock/apac.amazon.nova-micro-v1-0",
messages=[
{"role": "system", "content": "You are a helpful assistant with access to tools. You MUST strictly use the provided tools to answer. Never respond with plain text when a tool is available."},
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hi, how can I help you"},
{"role": "user", "content": "Use the get_weather tool to check the weather in London. You must call the tool, do not respond with plain text."},
],
tools=tools,
tool_choice="auto",
stream=False,
)
_message = response.choices[0].message
if _message.tool_calls:
for _tc in _message.tool_calls:
print(f"Function: {_tc.function.name}")
print(f"Arguments: {_tc.function.arguments}")
else:
print(_message.content)
if not _message.tool_calls or len(_message.tool_calls) == 0:
raise Exception("VALIDATION FAILED: tool-call - no tool calls in response")
print("VALIDATION: tool-call SUCCESS")Successes (180)
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
Output
OutputSkipped (14)
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason
Skip reason |


Auto-generated by poc-agent for provider
aws-bedrock.Note
Medium Risk
Changes affect cost estimation and advertised capabilities across many models; incorrect pricing or feature flags could mislead billing and routing, though there is no application logic in the diff.
Overview
Auto-generated refresh of AWS Bedrock provider model YAMLs: pricing, capabilities, param stripping, and doc links.
Param handling: Many chat and non-chat models now list
reasoning_effortunderremoveParamsso callers cannot send an unsupported reasoning knob (Nova micro variants, Gemma, Llama, Qwen coders, Pixtral, rerank, Luma video, etc.). Rerank also gains extra AWS doc sources.Costs & regions: AU Claude Sonnet 4.5 cost blocks add
cache_creation_input_token_cost_per_hour. Global Claude Sonnet 4.5 adds a fullap-southeast-6tiered pricing entry. Gemma 3 27B, Ministral 3 3B, and Nemotron Nano 3 30B gain new regionalcostsrows (e.g.eu-central-1,eu-north-1,ap-southeast-3). Qwen3 Coder 480B adds region placeholders (some annotated as not in official docs); Qwen3 Coder 30B adjustsap-southeast-2token rates.Features & limits: Claude Opus 4.6 adds
parallel_function_calling,cache_control, andassistant_prefill. Claude Sonnet 4.6 (EU/US) bumpstool_use_system_prompt_tokensfrom 346 to 497. Mistral Large 3 addsassistant_prefill; Mistral Small 2402 addstool_choiceandsystem_messages. Pixtral Large dropsjson_output. Cohere embed English v3 removes image from input modalities. EU Nova micro trims some sources and addsremoveParamsforreasoning_effort.Reviewed by Cursor Bugbot for commit 1018200. Bugbot is set up for automated code reviews on this repo. Configure here.