Skip to content

Commit 36a396a

Browse files
committed
fix: GPT-5.4 Responses API routing, context window limits, and reasoning effort lookup
- Route gpt-5.4 variants and codex/pro models to Responses API (not chat/completions) for both OpenAI and Databricks providers - Add strict:false to Responses API function tool definitions - Handle databricks- prefix stripping for routing decisions - Handle dash-variant model names (gpt-5-4 == gpt-5.4) - Fix OPEN_AI_KNOWN_MODELS context windows: gpt-5.4/pro to 1,050,000, gpt-5.4-nano to 400,000 - Add missing gpt-5.3-codex to known models - Fix canonical lookup: strip reasoning effort suffixes (-high, -xhigh, etc.) before retrying, so models like databricks-gpt-5.4-high get correct context limits instead of falling back to 128k default Signed-off-by: Bradley Axen <baxen@squareup.com>
1 parent b7aea3d commit 36a396a

6 files changed

Lines changed: 635 additions & 17 deletions

File tree

crates/goose/src/model.rs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,22 @@ impl ModelConfig {
138138
}
139139
}
140140

141-
if let Some(canonical) =
141+
// Try canonical lookup with the full model name first, then fall back
142+
// to the name with reasoning-effort suffixes stripped (e.g.
143+
// "databricks-gpt-5.4-high" → "databricks-gpt-5.4").
144+
let canonical =
142145
crate::providers::canonical::maybe_get_canonical_model(provider_name, &self.model_name)
143-
{
146+
.or_else(|| {
147+
let (base, _effort) =
148+
crate::providers::utils::extract_reasoning_effort(&self.model_name);
149+
if base != self.model_name {
150+
crate::providers::canonical::maybe_get_canonical_model(provider_name, &base)
151+
} else {
152+
None
153+
}
154+
});
155+
156+
if let Some(canonical) = canonical {
144157
if self.context_limit.is_none() {
145158
self.context_limit = Some(canonical.limit.context);
146159
}
@@ -499,6 +512,28 @@ mod tests {
499512
assert_eq!(config.max_tokens, None);
500513
assert_eq!(config.reasoning, None);
501514
}
515+
516+
#[test]
517+
fn resolves_after_stripping_reasoning_effort_suffix() {
518+
let _guard = env_lock::lock_env([
519+
("GOOSE_MAX_TOKENS", None::<&str>),
520+
("GOOSE_CONTEXT_LIMIT", None::<&str>),
521+
]);
522+
523+
// "databricks-gpt-5.4-high" should resolve via "databricks-gpt-5.4"
524+
let config = ModelConfig::new_or_fail("databricks-gpt-5.4-high")
525+
.with_canonical_limits("databricks");
526+
assert_eq!(config.context_limit, Some(1_050_000));
527+
528+
// "gpt-5.4-xhigh" should resolve via "gpt-5.4"
529+
let config = ModelConfig::new_or_fail("gpt-5.4-xhigh").with_canonical_limits("openai");
530+
assert_eq!(config.context_limit, Some(1_050_000));
531+
532+
// "gpt-5.4-nano-low" should resolve via "gpt-5.4-nano"
533+
let config =
534+
ModelConfig::new_or_fail("gpt-5.4-nano-low").with_canonical_limits("openai");
535+
assert_eq!(config.context_limit, Some(400_000));
536+
}
502537
}
503538

504539
mod is_openai_reasoning_model {

crates/goose/src/providers/databricks.rs

Lines changed: 260 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,16 +270,26 @@ impl DatabricksProvider {
270270

271271
fn is_responses_model(model_name: &str) -> bool {
272272
let normalized = model_name.to_ascii_lowercase();
273-
normalized.contains("codex")
273+
let base = normalized
274+
.strip_prefix("databricks-")
275+
.unwrap_or(&normalized);
276+
let base = super::utils::normalize_gpt5_version(base);
277+
base.contains("codex")
278+
|| base.starts_with("gpt-5-pro")
279+
|| base.starts_with("gpt-5.2-pro")
280+
|| base.starts_with("gpt-5.4")
274281
}
275282

276283
fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String {
277284
if is_embedding {
278285
"serving-endpoints/text-embedding-3-small/invocations".to_string()
279-
} else if Self::is_responses_model(model_name) {
280-
"serving-endpoints/responses".to_string()
281286
} else {
282-
format!("serving-endpoints/{}/invocations", model_name)
287+
let (clean_name, _) = super::utils::extract_reasoning_effort(model_name);
288+
if Self::is_responses_model(&clean_name) {
289+
"serving-endpoints/responses".to_string()
290+
} else {
291+
format!("serving-endpoints/{}/invocations", clean_name)
292+
}
283293
}
284294
}
285295

@@ -590,3 +600,249 @@ impl EmbeddingCapable for DatabricksProvider {
590600
Ok(embeddings)
591601
}
592602
}
603+
604+
#[cfg(test)]
605+
mod tests {
606+
use super::*;
607+
608+
#[test]
609+
fn gpt_5_4_is_responses_model() {
610+
assert!(
611+
DatabricksProvider::is_responses_model("gpt-5.4"),
612+
"gpt-5.4 requires the Responses API — chat/completions rejects reasoning_effort with function tools"
613+
);
614+
}
615+
616+
#[test]
617+
fn gpt_5_2_pro_is_responses_model() {
618+
assert!(
619+
DatabricksProvider::is_responses_model("gpt-5.2-pro"),
620+
"gpt-5.2-pro requires the Responses API"
621+
);
622+
}
623+
624+
#[test]
625+
fn codex_models_remain_responses_models() {
626+
assert!(DatabricksProvider::is_responses_model("gpt-5-codex"));
627+
assert!(DatabricksProvider::is_responses_model("gpt-5.1-codex"));
628+
}
629+
630+
#[test]
631+
fn gpt_5_pro_is_responses_model() {
632+
assert!(
633+
DatabricksProvider::is_responses_model("gpt-5-pro"),
634+
"gpt-5-pro only supports the Responses API"
635+
);
636+
}
637+
638+
#[test]
639+
fn gpt_5_4_mini_is_responses_model() {
640+
assert!(DatabricksProvider::is_responses_model("gpt-5.4-mini"));
641+
}
642+
643+
#[test]
644+
fn gpt_5_4_nano_is_responses_model() {
645+
assert!(DatabricksProvider::is_responses_model("gpt-5.4-nano"));
646+
}
647+
648+
#[test]
649+
fn gpt_5_4_pro_is_responses_model() {
650+
assert!(DatabricksProvider::is_responses_model("gpt-5.4-pro"));
651+
}
652+
653+
#[test]
654+
fn non_responses_models_are_not_matched() {
655+
assert!(!DatabricksProvider::is_responses_model("gpt-4o"));
656+
assert!(!DatabricksProvider::is_responses_model("gpt-5"));
657+
assert!(!DatabricksProvider::is_responses_model("gpt-5-mini"));
658+
assert!(!DatabricksProvider::is_responses_model("gpt-5-nano"));
659+
assert!(!DatabricksProvider::is_responses_model("gpt-5.1"));
660+
assert!(!DatabricksProvider::is_responses_model("gpt-5.2"));
661+
assert!(!DatabricksProvider::is_responses_model("o3-mini"));
662+
assert!(!DatabricksProvider::is_responses_model("claude-sonnet-4"));
663+
}
664+
665+
// --- Bug-fix tests: databricks-prefixed model names ---
666+
667+
#[test]
668+
fn databricks_prefixed_gpt_5_4_is_responses_model() {
669+
assert!(
670+
DatabricksProvider::is_responses_model("databricks-gpt-5.4"),
671+
"databricks-gpt-5.4 should route to the Responses API"
672+
);
673+
}
674+
675+
#[test]
676+
fn databricks_prefixed_gpt_5_4_mini_is_responses_model() {
677+
assert!(DatabricksProvider::is_responses_model(
678+
"databricks-gpt-5.4-mini"
679+
));
680+
}
681+
682+
#[test]
683+
fn databricks_prefixed_gpt_5_pro_is_responses_model() {
684+
assert!(DatabricksProvider::is_responses_model(
685+
"databricks-gpt-5-pro"
686+
));
687+
}
688+
689+
#[test]
690+
fn databricks_prefixed_codex_is_responses_model() {
691+
assert!(DatabricksProvider::is_responses_model(
692+
"databricks-gpt-5-codex"
693+
));
694+
}
695+
696+
#[test]
697+
fn databricks_prefixed_non_responses_model_is_not_matched() {
698+
assert!(!DatabricksProvider::is_responses_model("databricks-gpt-4o"));
699+
assert!(!DatabricksProvider::is_responses_model(
700+
"databricks-claude-sonnet-4"
701+
));
702+
}
703+
704+
// --- Bug-fix tests: reasoning suffix must be stripped from endpoint path ---
705+
706+
#[test]
707+
fn endpoint_path_strips_reasoning_suffix_for_chat_model() {
708+
let provider = DatabricksProvider {
709+
api_client: super::super::api_client::ApiClient::new(
710+
"https://example.com".to_string(),
711+
super::super::api_client::AuthMethod::NoAuth,
712+
)
713+
.unwrap(),
714+
auth: DatabricksAuth::Token("fake".into()),
715+
model: ModelConfig::new_or_fail("databricks-gpt-5-4"),
716+
image_format: ImageFormat::OpenAi,
717+
retry_config: RetryConfig::default(),
718+
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
719+
name: "databricks".into(),
720+
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
721+
instance_id: None,
722+
};
723+
724+
// "databricks-gpt-5-4-high" — gpt-5-4 is the dash variant of gpt-5.4,
725+
// which requires the Responses API.
726+
let path = provider.get_endpoint_path("databricks-gpt-5-4-high", false);
727+
assert_eq!(
728+
path, "serving-endpoints/responses",
729+
"gpt-5-4 (dash variant of gpt-5.4) must route to the Responses API"
730+
);
731+
}
732+
733+
#[test]
734+
fn endpoint_path_routes_prefixed_responses_model_correctly() {
735+
let provider = DatabricksProvider {
736+
api_client: super::super::api_client::ApiClient::new(
737+
"https://example.com".to_string(),
738+
super::super::api_client::AuthMethod::NoAuth,
739+
)
740+
.unwrap(),
741+
auth: DatabricksAuth::Token("fake".into()),
742+
model: ModelConfig::new_or_fail("databricks-gpt-5.4"),
743+
image_format: ImageFormat::OpenAi,
744+
retry_config: RetryConfig::default(),
745+
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
746+
name: "databricks".into(),
747+
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
748+
instance_id: None,
749+
};
750+
751+
let path = provider.get_endpoint_path("databricks-gpt-5.4-high", false);
752+
assert_eq!(
753+
path, "serving-endpoints/responses",
754+
"databricks-gpt-5.4 variants must route to the Responses API"
755+
);
756+
}
757+
758+
#[test]
759+
fn endpoint_path_unchanged_for_non_reasoning_model() {
760+
let provider = DatabricksProvider {
761+
api_client: super::super::api_client::ApiClient::new(
762+
"https://example.com".to_string(),
763+
super::super::api_client::AuthMethod::NoAuth,
764+
)
765+
.unwrap(),
766+
auth: DatabricksAuth::Token("fake".into()),
767+
model: ModelConfig::new_or_fail("databricks-claude-sonnet-4"),
768+
image_format: ImageFormat::OpenAi,
769+
retry_config: RetryConfig::default(),
770+
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
771+
name: "databricks".into(),
772+
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
773+
instance_id: None,
774+
};
775+
776+
let path = provider.get_endpoint_path("databricks-claude-sonnet-4", false);
777+
assert_eq!(
778+
path,
779+
"serving-endpoints/databricks-claude-sonnet-4/invocations"
780+
);
781+
}
782+
783+
// --- Bug-fix: dash-variant model names (gpt-5-4 == gpt-5.4) ---
784+
785+
#[test]
786+
fn gpt_5_4_dash_variant_is_responses_model() {
787+
assert!(
788+
DatabricksProvider::is_responses_model("gpt-5-4"),
789+
"gpt-5-4 (dash variant of gpt-5.4) must route to the Responses API"
790+
);
791+
}
792+
793+
#[test]
794+
fn gpt_5_4_dash_variant_mini_is_responses_model() {
795+
assert!(DatabricksProvider::is_responses_model("gpt-5-4-mini"));
796+
}
797+
798+
#[test]
799+
fn gpt_5_4_dash_variant_nano_is_responses_model() {
800+
assert!(DatabricksProvider::is_responses_model("gpt-5-4-nano"));
801+
}
802+
803+
#[test]
804+
fn gpt_5_4_dash_variant_pro_is_responses_model() {
805+
assert!(DatabricksProvider::is_responses_model("gpt-5-4-pro"));
806+
}
807+
808+
#[test]
809+
fn databricks_prefixed_gpt_5_4_dash_variant_is_responses_model() {
810+
assert!(
811+
DatabricksProvider::is_responses_model("databricks-gpt-5-4"),
812+
"databricks-gpt-5-4 must route to the Responses API"
813+
);
814+
}
815+
816+
#[test]
817+
fn gpt_5_2_dash_variant_pro_is_responses_model() {
818+
assert!(
819+
DatabricksProvider::is_responses_model("gpt-5-2-pro"),
820+
"gpt-5-2-pro (dash variant of gpt-5.2-pro) must route to the Responses API"
821+
);
822+
}
823+
824+
#[test]
825+
fn endpoint_path_routes_dash_variant_gpt_5_4_to_responses() {
826+
let provider = DatabricksProvider {
827+
api_client: super::super::api_client::ApiClient::new(
828+
"https://example.com".to_string(),
829+
super::super::api_client::AuthMethod::NoAuth,
830+
)
831+
.unwrap(),
832+
auth: DatabricksAuth::Token("fake".into()),
833+
model: ModelConfig::new_or_fail("databricks-gpt-5-4"),
834+
image_format: ImageFormat::OpenAi,
835+
retry_config: RetryConfig::default(),
836+
fast_retry_config: RetryConfig::new(0, 0, 1.0, 0),
837+
name: "databricks".into(),
838+
token_cache: std::sync::Arc::new(std::sync::Mutex::new(None)),
839+
instance_id: None,
840+
};
841+
842+
let path = provider.get_endpoint_path("databricks-gpt-5-4-xhigh", false);
843+
assert_eq!(
844+
path, "serving-endpoints/responses",
845+
"databricks-gpt-5-4-xhigh must route to the Responses API, not chat/completions"
846+
);
847+
}
848+
}

crates/goose/src/providers/formats/databricks.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ pub fn create_request(
587587
let last_part = parts.last().unwrap();
588588

589589
match *last_part {
590-
"low" | "medium" | "high" => {
590+
"none" | "low" | "medium" | "high" | "xhigh" => {
591591
let base_name = parts[..parts.len() - 1].join("-");
592592
(base_name, Some(last_part.to_string()))
593593
}
@@ -1073,6 +1073,44 @@ mod tests {
10731073
Ok(())
10741074
}
10751075

1076+
#[test]
1077+
fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> {
1078+
let model_config = ModelConfig {
1079+
model_name: "o3-xhigh".to_string(),
1080+
context_limit: Some(4096),
1081+
temperature: None,
1082+
max_tokens: Some(1024),
1083+
toolshim: false,
1084+
toolshim_model: None,
1085+
fast_model_config: None,
1086+
request_params: None,
1087+
reasoning: None,
1088+
};
1089+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1090+
assert_eq!(request["model"], "o3");
1091+
assert_eq!(request["reasoning_effort"], "xhigh");
1092+
Ok(())
1093+
}
1094+
1095+
#[test]
1096+
fn test_create_request_reasoning_effort_none() -> anyhow::Result<()> {
1097+
let model_config = ModelConfig {
1098+
model_name: "o3-none".to_string(),
1099+
context_limit: Some(4096),
1100+
temperature: None,
1101+
max_tokens: Some(1024),
1102+
toolshim: false,
1103+
toolshim_model: None,
1104+
fast_model_config: None,
1105+
request_params: None,
1106+
reasoning: None,
1107+
};
1108+
let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?;
1109+
assert_eq!(request["model"], "o3");
1110+
assert_eq!(request["reasoning_effort"], "none");
1111+
Ok(())
1112+
}
1113+
10761114
#[test]
10771115
fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> {
10781116
let _guard = env_lock::lock_env([

0 commit comments

Comments
 (0)