Skip to content

Commit 7ddfee7

Browse files
baxenspikewang
authored andcommitted
fix: add strict:false to Responses API tools and gpt-5.4 to known models (aaif-goose#8636)
Signed-off-by: Bradley Axen <baxen@squareup.com>
1 parent 22afde0 commit 7ddfee7

11 files changed

Lines changed: 474 additions & 130 deletions

File tree

crates/goose-test-support/src/session.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
use std::sync::{Arc, Mutex};
22

33
pub const TEST_SESSION_ID: &str = "test-session-id";
4-
pub const TEST_MODEL: &str = "gpt-5-nano";
4+
// Use a Chat Completions model so the canned SSE fixtures (which return
5+
// Chat Completions format) are parsed correctly. gpt-5-nano now routes to
6+
// the Responses API which needs a different mock format.
7+
// TODO: add a Responses API mock to OpenAiFixture so tests can cover
8+
// responses-routed models like gpt-5-nano end-to-end.
9+
pub const TEST_MODEL: &str = "gpt-4.1";
510

611
const NOT_YET_SET: &str = "session-id-not-yet-set";
712
pub(crate) const SESSION_ID_HEADER: &str = "agent-session-id";

crates/goose/src/model.rs

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,22 @@ impl ModelConfig {
138138
}
139139
}
140140

141-
if let Some(canonical) =
141+
// Try canonical lookup with the full model name first, then fall back
142+
// to the name with reasoning-effort suffixes stripped (e.g.
143+
// "databricks-gpt-5.4-high" → "databricks-gpt-5.4").
144+
let canonical =
142145
crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
143-
{
146+
.or_else(|| {
147+
let (base, _effort) =
148+
crate::providers::utils::extract_reasoning_effort(&self.model_name);
149+
if base != self.model_name {
150+
crate::providers::canonical::maybe_get_canonical_model(provider_name, &base)
151+
} else {
152+
None
153+
}
154+
});
155+
156+
if let Some(canonical) = canonical {
144157
if self.context_limit.is_none() {
145158
self.context_limit = Some(canonical.limit.context);
146159
}
@@ -299,15 +312,7 @@ impl ModelConfig {
299312
}
300313

301314
pub fn is_openai_reasoning_model(&self) -> bool {
302-
const DATABRICKS_MODEL_NAME_PREFIXES: &[&str] = &["goose-", "databricks-"];
303-
const REASONING_PREFIXES: &[&str] = &["o1", "o3", "o4", "gpt-5"];
304-
305-
let base = DATABRICKS_MODEL_NAME_PREFIXES
306-
.iter()
307-
.find_map(|p| self.model_name.strip_prefix(p))
308-
.unwrap_or(&self.model_name);
309-
310-
REASONING_PREFIXES.iter().any(|p| base.starts_with(p))
315+
crate::providers::utils::is_openai_responses_model(&self.model_name)
311316
}
312317

313318
pub fn max_output_tokens(&self) -> i32 {
@@ -499,6 +504,28 @@ mod tests {
499504
assert_eq!(config.max_tokens, None);
500505
assert_eq!(config.reasoning, None);
501506
}
507+
508+
#[test]
509+
fn resolves_after_stripping_reasoning_effort_suffix() {
510+
let _guard = env_lock::lock_env([
511+
("GOOSE_MAX_TOKENS", None::<&str>),
512+
("GOOSE_CONTEXT_LIMIT", None::<&str>),
513+
]);
514+
515+
// "databricks-gpt-5.4-high" should resolve via "databricks-gpt-5.4"
516+
let config = ModelConfig::new_or_fail("databricks-gpt-5.4-high")
517+
.with_canonical_limits("databricks");
518+
assert_eq!(config.context_limit, Some(1_050_000));
519+
520+
// "gpt-5.4-xhigh" should resolve via "gpt-5.4"
521+
let config = ModelConfig::new_or_fail("gpt-5.4-xhigh").with_canonical_limits("openai");
522+
assert_eq!(config.context_limit, Some(1_050_000));
523+
524+
// "gpt-5.4-nano-low" should resolve via "gpt-5.4-nano"
525+
let config =
526+
ModelConfig::new_or_fail("gpt-5.4-nano-low").with_canonical_limits("openai");
527+
assert_eq!(config.context_limit, Some(400_000));
528+
}
502529
}
503530

504531
mod is_openai_reasoning_model {

crates/goose/src/providers/databricks.rs

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -269,17 +269,19 @@ impl DatabricksProvider {
269269
}
270270

271271
fn is_responses_model(model_name: &str) -> bool {
272-
let normalized = model_name.to_ascii_lowercase();
273-
normalized.contains("codex")
272+
super::utils::is_openai_responses_model(model_name)
274273
}
275274

276275
fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
277276
if is_embedding {
278277
"serving-endpoints/text-embedding-3-small/invocations".to_string()
279-
} else if Self::is_responses_model(model_name) {
280-
"serving-endpoints/responses".to_string()
281278
} else {
282-
format!("serving-endpoints/{}/invocations", model_name)
279+
let (clean_name, _) = super::utils::extract_reasoning_effort(model_name);
280+
if Self::is_responses_model(&clean_name) {
281+
"serving-endpoints/responses".to_string()
282+
} else {
283+
format!("serving-endpoints/{}/invocations", clean_name)
284+
}
283285
}
284286
}
285287

@@ -594,3 +596,48 @@ impl EmbeddingCapable for DatabricksProvider {
594596
Ok(embeddings)
595597
}
596598
}
599+
600+
#[cfg(test)]
601+
mod tests {
602+
use super::*;
603+
604+
fn test_provider() -> DatabricksProvider {
605+
DatabricksProvider {
606+
api_client: super::super::api_client::ApiClient::new(
607+
"https://example.com".to_string(),
608+
super::super::api_client::AuthMethod::NoAuth,
609+
)
610+
.unwrap(),
611+
auth: DatabricksAuth::Token("fake".into()),
612+
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
613+
image_format: ImageFormat::OpenAi,
614+
retry_config: RetryConfig::default(),
615+
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
616+
name: "databricks".into(),
617+
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
618+
instance_id: None,
619+
}
620+
}
621+
622+
#[test]
623+
fn responses_models_route_to_responses_endpoint() {
624+
let provider = test_provider();
625+
626+
for (model_name, expected_path) in [
627+
("gpt-5.4", "serving-endpoints/responses"),
628+
("databricks-gpt-5.4-high", "serving-endpoints/responses"),
629+
("databricks-gpt-5-4-xhigh", "serving-endpoints/responses"),
630+
("o3-mini", "serving-endpoints/responses"),
631+
(
632+
"databricks-claude-sonnet-4",
633+
"serving-endpoints/databricks-claude-sonnet-4/invocations",
634+
),
635+
] {
636+
assert_eq!(
637+
provider.get_endpoint_path(model_name, false),
638+
expected_path,
639+
"unexpected endpoint for {model_name}"
640+
);
641+
}
642+
}
643+
}

crates/goose/src/providers/formats/databricks.rs

Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ use crate::conversation::message::{Message, MessageContent};
22
use crate::model::ModelConfig;
33
use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType};
44
use crate::providers::utils::{
5-
convert_image, detect_image_path, is_valid_function_name, load_image_file, safely_parse_json,
6-
sanitize_function_name, ImageFormat,
5+
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
6+
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
7+
ImageFormat,
78
};
89
use anyhow::{anyhow, Error};
910
use rmcp::model::{
@@ -581,24 +582,8 @@ pub fn create_request(
581582
));
582583
}
583584

584-
let is_openai_reasoning_model = model_config.is_openai_reasoning_model();
585-
let (model_name, reasoning_effort) = if is_openai_reasoning_model {
586-
let parts: Vec<&str> = model_config.model_name.split('-').collect();
587-
let last_part = parts.last().unwrap();
588-
589-
match *last_part {
590-
"low" | "medium" | "high" => {
591-
let base_name = parts[..parts.len() - 1].join("-");
592-
(base_name, Some(last_part.to_string()))
593-
}
594-
_ => (
595-
model_config.model_name.to_string(),
596-
Some("medium".to_string()),
597-
),
598-
}
599-
} else {
600-
(model_config.model_name.to_string(), None)
601-
};
585+
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
586+
let is_openai_reasoning_model = is_openai_responses_model(&model_name);
602587

603588
let system_message = DatabricksMessage {
604589
role: "system".to_string(),
@@ -1073,6 +1058,63 @@ mod tests {
10731058
Ok(())
10741059
}
10751060

1061+
#[test]
1062+
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
1063+
let model_config = ModelConfig {
1064+
model_name: "o3-xhigh".to_string(),
1065+
context_limit: Some(4096),
1066+
temperature: None,
1067+
max_tokens: Some(1024),
1068+
toolshim: false,
1069+
toolshim_model: None,
1070+
fast_model_config: None,
1071+
request_params: None,
1072+
reasoning: None,
1073+
};
1074+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1075+
assert_eq!(request["model"], "o3");
1076+
assert_eq!(request["reasoning_effort"], "xhigh");
1077+
Ok(())
1078+
}
1079+
1080+
#[test]
1081+
fn test_create_request_reasoning_effort_none() -> anyhow::Result<()> {
1082+
let model_config = ModelConfig {
1083+
model_name: "o3-none".to_string(),
1084+
context_limit: Some(4096),
1085+
temperature: None,
1086+
max_tokens: Some(1024),
1087+
toolshim: false,
1088+
toolshim_model: None,
1089+
fast_model_config: None,
1090+
request_params: None,
1091+
reasoning: None,
1092+
};
1093+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1094+
assert_eq!(request["model"], "o3");
1095+
assert_eq!(request["reasoning_effort"], "none");
1096+
Ok(())
1097+
}
1098+
1099+
#[test]
1100+
fn test_create_request_reasoning_effort_for_prefixed_gpt5_model() -> anyhow::Result<()> {
1101+
let model_config = ModelConfig {
1102+
model_name: "databricks-gpt-5.4-high".to_string(),
1103+
context_limit: Some(4096),
1104+
temperature: None,
1105+
max_tokens: Some(1024),
1106+
toolshim: false,
1107+
toolshim_model: None,
1108+
fast_model_config: None,
1109+
request_params: None,
1110+
reasoning: None,
1111+
};
1112+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1113+
assert_eq!(request["model"], "databricks-gpt-5.4");
1114+
assert_eq!(request["reasoning_effort"], "high");
1115+
Ok(())
1116+
}
1117+
10761118
#[test]
10771119
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
10781120
let _guard = env_lock::lock_env([

crates/goose/src/providers/formats/openai.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ use crate::model::ModelConfig;
44
use crate::providers::base::{ProviderUsage, Usage};
55
use crate::providers::errors::ProviderError;
66
use crate::providers::utils::{
7-
convert_image, detect_image_path, extract_reasoning_effort, is_valid_function_name,
8-
load_image_file, safely_parse_json, sanitize_function_name, ImageFormat,
7+
convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model,
8+
is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name,
9+
ImageFormat,
910
};
1011
use anyhow::{anyhow, Error};
1112
use async_stream::try_stream;
@@ -984,7 +985,7 @@ pub fn create_request(
984985
}
985986

986987
let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name);
987-
let is_reasoning_model = reasoning_effort.is_some();
988+
let is_reasoning_model = is_openai_responses_model(&model_name);
988989

989990
let system_message = json!({
990991
"role": if is_reasoning_model { "developer" } else { "system" },
@@ -1716,7 +1717,8 @@ mod tests {
17161717

17171718
#[test]
17181719
fn test_create_request_o1_default() -> anyhow::Result<()> {
1719-
// Test default medium reasoning effort for O1 model
1720+
// Without an explicit effort suffix the API picks its own default;
1721+
// we should omit reasoning_effort entirely but still use "developer" role.
17201722
let model_config = ModelConfig {
17211723
model_name: "o1".to_string(),
17221724
context_limit: Some(4096),
@@ -1745,13 +1747,16 @@ mod tests {
17451747
"content": "system"
17461748
}
17471749
],
1748-
"reasoning_effort": "medium",
17491750
"max_completion_tokens": 1024
17501751
});
17511752

17521753
for (key, value) in expected.as_object().unwrap() {
17531754
assert_eq!(obj.get(key).unwrap(), value);
17541755
}
1756+
assert!(
1757+
obj.get("reasoning_effort").is_none(),
1758+
"reasoning_effort should be omitted when no explicit suffix is provided"
1759+
);
17551760

17561761
Ok(())
17571762
}

0 commit comments

Comments
 (0)