Skip to content

Commit 319a400

Browse files
committed
fix: GPT-5.4 Responses API routing, context window limits, and reasoning effort lookup
- Route gpt-5.4 variants and codex/pro models to Responses API (not chat/completions) for both OpenAI and Databricks providers - Add strict:false to Responses API function tool definitions - Handle databricks- prefix stripping for routing decisions - Handle dash-variant model names (gpt-5-4 == gpt-5.4) - Fix OPEN_AI_KNOWN_MODELS context windows: gpt-5.4/pro to 1,050,000, gpt-5.4-nano to 400,000 - Add missing gpt-5.3-codex to known models - Fix canonical lookup: strip reasoning effort suffixes (-high, -xhigh, etc.) before retrying, so models like databricks-gpt-5.4-high get correct context limits instead of falling back to 128k default Signed-off-by: Bradley Axen <baxen@squareup.com>
1 parent b1235e7 commit 319a400

7 files changed

Lines changed: 404 additions & 111 deletions

File tree

crates/goose/src/model.rs

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,22 @@ impl ModelConfig {
138138
}
139139
}
140140

141-
if let Some(canonical) =
141+
// Try canonical lookup with the full model name first, then fall back
142+
// to the name with reasoning-effort suffixes stripped (e.g.
143+
// "databricks-gpt-5.4-high" → "databricks-gpt-5.4").
144+
let canonical =
142145
crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
143-
{
146+
.or_else(|| {
147+
let (base, _effort) =
148+
crate::providers::utils::extract_reasoning_effort(&self.model_name);
149+
if base != self.model_name {
150+
crate::providers::canonical::maybe_get_canonical_model(provider_name, &base)
151+
} else {
152+
None
153+
}
154+
});
155+
156+
if let Some(canonical) = canonical {
144157
if self.context_limit.is_none() {
145158
self.context_limit = Some(canonical.limit.context);
146159
}
@@ -299,15 +312,7 @@ impl ModelConfig {
299312
}
300313

301314
pub fn is_openai_reasoning_model(&self) -> bool {
302-
const DATABRICKS_MODEL_NAME_PREFIXES: &[&str] = &["goose-", "databricks-"];
303-
const REASONING_PREFIXES: &[&str] = &["o1", "o3", "o4", "gpt-5"];
304-
305-
let base = DATABRICKS_MODEL_NAME_PREFIXES
306-
.iter()
307-
.find_map(|p| self.model_name.strip_prefix(p))
308-
.unwrap_or(&self.model_name);
309-
310-
REASONING_PREFIXES.iter().any(|p| base.starts_with(p))
315+
crate::providers::utils::is_openai_responses_model(&self.model_name)
311316
}
312317

313318
pub fn max_output_tokens(&self) -> i32 {
@@ -499,6 +504,28 @@ mod tests {
499504
assert_eq!(config.max_tokens, None);
500505
assert_eq!(config.reasoning, None);
501506
}
507+
508+
#[test]
509+
fn resolves_after_stripping_reasoning_effort_suffix() {
510+
let _guard = env_lock::lock_env([
511+
("GOOSE_MAX_TOKENS", None::<&str>),
512+
("GOOSE_CONTEXT_LIMIT", None::<&str>),
513+
]);
514+
515+
// "databricks-gpt-5.4-high" should resolve via "databricks-gpt-5.4"
516+
let config = ModelConfig::new_or_fail("databricks-gpt-5.4-high")
517+
.with_canonical_limits("databricks");
518+
assert_eq!(config.context_limit, Some(1_050_000));
519+
520+
// "gpt-5.4-xhigh" should resolve via "gpt-5.4"
521+
let config = ModelConfig::new_or_fail("gpt-5.4-xhigh").with_canonical_limits("openai");
522+
assert_eq!(config.context_limit, Some(1_050_000));
523+
524+
// "gpt-5.4-nano-low" should resolve via "gpt-5.4-nano"
525+
let config =
526+
ModelConfig::new_or_fail("gpt-5.4-nano-low").with_canonical_limits("openai");
527+
assert_eq!(config.context_limit, Some(400_000));
528+
}
502529
}
503530

504531
mod is_openai_reasoning_model {

crates/goose/src/providers/databricks.rs

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -269,17 +269,19 @@ impl DatabricksProvider {
269269
}
270270

271271
fn is_responses_model(model_name: &str) -> bool {
272-
let normalized = model_name.to_ascii_lowercase();
273-
normalized.contains("codex")
272+
super::utils::is_openai_responses_model(model_name)
274273
}
275274

276275
fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
277276
if is_embedding {
278277
"serving-endpoints/text-embedding-3-small/invocations".to_string()
279-
} else if Self::is_responses_model(model_name) {
280-
"serving-endpoints/responses".to_string()
281278
} else {
282-
format!("serving-endpoints/{}/invocations", model_name)
279+
let (clean_name, _) = super::utils::extract_reasoning_effort(model_name);
280+
if Self::is_responses_model(&clean_name) {
281+
"serving-endpoints/responses".to_string()
282+
} else {
283+
format!("serving-endpoints/{}/invocations", clean_name)
284+
}
283285
}
284286
}
285287

@@ -594,3 +596,48 @@ impl EmbeddingCapable for DatabricksProvider {
594596
Ok(embeddings)
595597
}
596598
}
599+
600+
#[cfg(test)]
601+
mod tests {
602+
use super::*;
603+
604+
fn test_provider() -> DatabricksProvider {
605+
DatabricksProvider {
606+
api_client: super::super::api_client::ApiClient::new(
607+
"https://example.com".to_string(),
608+
super::super::api_client::AuthMethod::NoAuth,
609+
)
610+
.unwrap(),
611+
auth: DatabricksAuth::Token("fake".into()),
612+
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
613+
image_format: ImageFormat::OpenAi,
614+
retry_config: RetryConfig::default(),
615+
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
616+
name: "databricks".into(),
617+
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
618+
instance_id: None,
619+
}
620+
}
621+
622+
#[test]
623+
fn responses_models_route_to_responses_endpoint() {
624+
let provider = test_provider();
625+
626+
for (model_name, expected_path) in [
627+
("gpt-5.4", "serving-endpoints/responses"),
628+
("databricks-gpt-5.4-high", "serving-endpoints/responses"),
629+
("databricks-gpt-5-4-xhigh", "serving-endpoints/responses"),
630+
("o3-mini", "serving-endpoints/responses"),
631+
(
632+
"databricks-claude-sonnet-4",
633+
"serving-endpoints/databricks-claude-sonnet-4/invocations",
634+
),
635+
] {
636+
assert_eq!(
637+
provider.get_endpoint_path(model_name, false),
638+
expected_path,
639+
"unexpected endpoint for {model_name}"
640+
);
641+
}
642+
}
643+
}

crates/goose/src/providers/formats/databricks.rs

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use crate::conversation::message::{Message, MessageContent};
22
use crate::model::ModelConfig;
33
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
44
use crate::providers::utils::{
5-
convert_image, detect_image_path, is_valid_function_name, load_image_file, safely_parse_json,
6-
sanitize_function_name, ImageFormat,
5+
convert_image, detect_image_path, extract_reasoning_effort, is_valid_function_name,
6+
load_image_file, safely_parse_json, sanitize_function_name, ImageFormat,
77
};
88
use anyhow::{anyhow, Error};
99
use rmcp::model::{
@@ -581,24 +581,8 @@ pub fn create_request(
581581
));
582582
}
583583

584-
let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
585-
let (model_name, reasoning_effort) = if is_openai_reasoning_model {
586-
let parts: Vec<&str> = model_config.model_name.split('-').collect();
587-
let last_part = parts.last().unwrap();
588-
589-
match *last_part {
590-
"low" | "medium" | "high" => {
591-
let base_name = parts[..parts.len() - 1].join("-");
592-
(base_name, Some(last_part.to_string()))
593-
}
594-
_ => (
595-
model_config.model_name.to_string(),
596-
Some("medium".to_string()),
597-
),
598-
}
599-
} else {
600-
(model_config.model_name.to_string(), None)
601-
};
584+
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
585+
let is_openai_reasoning_model = reasoning_effort.is_some();
602586

603587
let system_message = DatabricksMessage {
604588
role: "system".to_string(),
@@ -1073,6 +1057,63 @@ mod tests {
10731057
Ok(())
10741058
}
10751059

1060+
#[test]
1061+
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
1062+
let model_config = ModelConfig {
1063+
model_name: "o3-xhigh".to_string(),
1064+
context_limit: Some(4096),
1065+
temperature: None,
1066+
max_tokens: Some(1024),
1067+
toolshim: false,
1068+
toolshim_model: None,
1069+
fast_model_config: None,
1070+
request_params: None,
1071+
reasoning: None,
1072+
};
1073+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1074+
assert_eq!(request["model"], "o3");
1075+
assert_eq!(request["reasoning_effort"], "xhigh");
1076+
Ok(())
1077+
}
1078+
1079+
#[test]
1080+
fn test_create_request_reasoning_effort_none() -> anyhow::Result<()> {
1081+
let model_config = ModelConfig {
1082+
model_name: "o3-none".to_string(),
1083+
context_limit: Some(4096),
1084+
temperature: None,
1085+
max_tokens: Some(1024),
1086+
toolshim: false,
1087+
toolshim_model: None,
1088+
fast_model_config: None,
1089+
request_params: None,
1090+
reasoning: None,
1091+
};
1092+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1093+
assert_eq!(request["model"], "o3");
1094+
assert_eq!(request["reasoning_effort"], "none");
1095+
Ok(())
1096+
}
1097+
1098+
#[test]
1099+
fn test_create_request_reasoning_effort_for_prefixed_gpt5_model() -> anyhow::Result<()> {
1100+
let model_config = ModelConfig {
1101+
model_name: "databricks-gpt-5.4-high".to_string(),
1102+
context_limit: Some(4096),
1103+
temperature: None,
1104+
max_tokens: Some(1024),
1105+
toolshim: false,
1106+
toolshim_model: None,
1107+
fast_model_config: None,
1108+
request_params: None,
1109+
reasoning: None,
1110+
};
1111+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1112+
assert_eq!(request["model"], "databricks-gpt-5.4");
1113+
assert_eq!(request["reasoning_effort"], "high");
1114+
Ok(())
1115+
}
1116+
10761117
#[test]
10771118
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
10781119
let _guard = env_lock::lock_env([

crates/goose/src/providers/formats/openai_responses.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ pub fn create_responses_request(
495495
"name": tool.name,
496496
"description": tool.description,
497497
"parameters": tool.input_schema,
498+
"strict": false,
498499
})
499500
})
500501
.collect();
@@ -1083,4 +1084,78 @@ mod tests {
10831084
assert_eq!(info.effort.as_deref(), Some("high"));
10841085
assert_eq!(info.summary.as_deref(), Some("Thought deeply"));
10851086
}
1087+
1088+
#[test]
1089+
fn test_responses_tools_include_strict_false() {
1090+
let model_config = ModelConfig {
1091+
model_name: "gpt-5.4".to_string(),
1092+
context_limit: None,
1093+
temperature: None,
1094+
max_tokens: None,
1095+
toolshim: false,
1096+
toolshim_model: None,
1097+
fast_model_config: None,
1098+
request_params: None,
1099+
reasoning: None,
1100+
};
1101+
1102+
let tool = Tool::new(
1103+
"shell",
1104+
"Execute a shell command",
1105+
object!({
1106+
"type": "object",
1107+
"properties": {
1108+
"command": {
1109+
"type": "string",
1110+
"description": "The command to run"
1111+
}
1112+
},
1113+
"required": ["command"]
1114+
}),
1115+
);
1116+
1117+
let result =
1118+
create_responses_request(&model_config, "You are helpful.", &[], &[tool]).unwrap();
1119+
let tools = result["tools"]
1120+
.as_array()
1121+
.expect("tools should be an array");
1122+
assert_eq!(tools.len(), 1);
1123+
assert_eq!(tools[0]["strict"], json!(false),
1124+
"Responses API defaults strict to true, but MCP tool schemas are not strict-compatible; must explicitly set strict: false");
1125+
}
1126+
1127+
#[test]
1128+
fn test_responses_request_extracts_reasoning_effort_for_openai_and_databricks_models() {
1129+
for (model_name, expected_model, expected_effort) in [
1130+
("gpt-5.4", "gpt-5.4", "medium"),
1131+
("gpt-5.4-xhigh", "gpt-5.4", "xhigh"),
1132+
("databricks-gpt-5.4-high", "databricks-gpt-5.4", "high"),
1133+
("databricks-o3-none", "databricks-o3", "none"),
1134+
] {
1135+
let model_config = ModelConfig {
1136+
model_name: model_name.to_string(),
1137+
context_limit: None,
1138+
temperature: None,
1139+
max_tokens: None,
1140+
toolshim: false,
1141+
toolshim_model: None,
1142+
fast_model_config: None,
1143+
request_params: None,
1144+
reasoning: None,
1145+
};
1146+
1147+
let result =
1148+
create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap();
1149+
1150+
assert_eq!(
1151+
result["model"], expected_model,
1152+
"unexpected model for {model_name}"
1153+
);
1154+
assert_eq!(
1155+
result["reasoning"]["effort"], expected_effort,
1156+
"unexpected effort for {model_name}"
1157+
);
1158+
assert_eq!(result["reasoning"]["summary"], "auto");
1159+
}
1160+
}
10861161
}

0 commit comments

Comments
 (0)